aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2013-08-13 05:56:54 -0400
committerAlex Deucher <alexander.deucher@amd.com>2013-08-30 16:30:42 -0400
commit2483b4ea982efe8a544697d3f9642932e9af4dc1 (patch)
treef739e1b55b5e200817c174d4eae6f22935d152bf
parente409b128625732926c112cc9b709fb7bb1aa387f (diff)
drm/radeon: separate DMA code
Similar to separating the UVD code, just put the DMA functions into separate files. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/radeon/Makefile9
-rw-r--r--drivers/gpu/drm/radeon/cik.c736
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c785
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c161
-rw-r--r--drivers/gpu/drm/radeon/evergreen_dma.c190
-rw-r--r--drivers/gpu/drm/radeon/ni.c293
-rw-r--r--drivers/gpu/drm/radeon/ni_dma.c338
-rw-r--r--drivers/gpu/drm/radeon/r600.c473
-rw-r--r--drivers/gpu/drm/radeon/r600_dma.c497
-rw-r--r--drivers/gpu/drm/radeon/rv770.c74
-rw-r--r--drivers/gpu/drm/radeon/rv770_dma.c101
-rw-r--r--drivers/gpu/drm/radeon/si.c188
-rw-r--r--drivers/gpu/drm/radeon/si_dma.c235
13 files changed, 2181 insertions, 1899 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 1e23b18d549a..da2a8e9e9308 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -82,6 +82,15 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
82 trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ 82 trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
83 ci_dpm.o 83 ci_dpm.o
84 84
85# add async DMA block
86radeon-y += \
87 r600_dma.o \
88 rv770_dma.o \
89 evergreen_dma.o \
90 ni_dma.o \
91 si_dma.o \
92 cik_sdma.o \
93
85# add UVD block 94# add UVD block
86radeon-y += \ 95radeon-y += \
87 radeon_uvd.o \ 96 radeon_uvd.o \
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 1400b5203db1..692e31b95d34 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -64,6 +64,14 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
64extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 64extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65extern void si_rlc_reset(struct radeon_device *rdev); 65extern void si_rlc_reset(struct radeon_device *rdev);
66extern void si_init_uvd_internal_cg(struct radeon_device *rdev); 66extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67extern int cik_sdma_resume(struct radeon_device *rdev);
68extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69extern void cik_sdma_fini(struct radeon_device *rdev);
70extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 struct radeon_ib *ib,
72 uint64_t pe,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
67static void cik_rlc_stop(struct radeon_device *rdev); 75static void cik_rlc_stop(struct radeon_device *rdev);
68static void cik_pcie_gen3_enable(struct radeon_device *rdev); 76static void cik_pcie_gen3_enable(struct radeon_device *rdev);
69static void cik_program_aspm(struct radeon_device *rdev); 77static void cik_program_aspm(struct radeon_device *rdev);
@@ -3987,579 +3995,6 @@ static int cik_cp_resume(struct radeon_device *rdev)
3987 return 0; 3995 return 0;
3988} 3996}
3989 3997
3990/*
3991 * sDMA - System DMA
3992 * Starting with CIK, the GPU has new asynchronous
3993 * DMA engines. These engines are used for compute
3994 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3995 * and each one supports 1 ring buffer used for gfx
3996 * and 2 queues used for compute.
3997 *
3998 * The programming model is very similar to the CP
3999 * (ring buffer, IBs, etc.), but sDMA has it's own
4000 * packet format that is different from the PM4 format
4001 * used by the CP. sDMA supports copying data, writing
4002 * embedded data, solid fills, and a number of other
4003 * things. It also has support for tiling/detiling of
4004 * buffers.
4005 */
4006/**
4007 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4008 *
4009 * @rdev: radeon_device pointer
4010 * @ib: IB object to schedule
4011 *
4012 * Schedule an IB in the DMA ring (CIK).
4013 */
4014void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4015 struct radeon_ib *ib)
4016{
4017 struct radeon_ring *ring = &rdev->ring[ib->ring];
4018 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4019
4020 if (rdev->wb.enabled) {
4021 u32 next_rptr = ring->wptr + 5;
4022 while ((next_rptr & 7) != 4)
4023 next_rptr++;
4024 next_rptr += 4;
4025 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4026 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4027 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4028 radeon_ring_write(ring, 1); /* number of DWs to follow */
4029 radeon_ring_write(ring, next_rptr);
4030 }
4031
4032 /* IB packet must end on a 8 DW boundary */
4033 while ((ring->wptr & 7) != 4)
4034 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4035 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4036 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4037 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4038 radeon_ring_write(ring, ib->length_dw);
4039
4040}
4041
4042/**
4043 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4044 *
4045 * @rdev: radeon_device pointer
4046 * @fence: radeon fence object
4047 *
4048 * Add a DMA fence packet to the ring to write
4049 * the fence seq number and DMA trap packet to generate
4050 * an interrupt if needed (CIK).
4051 */
4052void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4053 struct radeon_fence *fence)
4054{
4055 struct radeon_ring *ring = &rdev->ring[fence->ring];
4056 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4057 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4058 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4059 u32 ref_and_mask;
4060
4061 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4062 ref_and_mask = SDMA0;
4063 else
4064 ref_and_mask = SDMA1;
4065
4066 /* write the fence */
4067 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4068 radeon_ring_write(ring, addr & 0xffffffff);
4069 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4070 radeon_ring_write(ring, fence->seq);
4071 /* generate an interrupt */
4072 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4073 /* flush HDP */
4074 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4075 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4076 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4077 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4078 radeon_ring_write(ring, ref_and_mask); /* MASK */
4079 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4080}
4081
4082/**
4083 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4084 *
4085 * @rdev: radeon_device pointer
4086 * @ring: radeon_ring structure holding ring information
4087 * @semaphore: radeon semaphore object
4088 * @emit_wait: wait or signal semaphore
4089 *
4090 * Add a DMA semaphore packet to the ring wait on or signal
4091 * other rings (CIK).
4092 */
4093void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4094 struct radeon_ring *ring,
4095 struct radeon_semaphore *semaphore,
4096 bool emit_wait)
4097{
4098 u64 addr = semaphore->gpu_addr;
4099 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4100
4101 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4102 radeon_ring_write(ring, addr & 0xfffffff8);
4103 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4104}
4105
4106/**
4107 * cik_sdma_gfx_stop - stop the gfx async dma engines
4108 *
4109 * @rdev: radeon_device pointer
4110 *
4111 * Stop the gfx async dma ring buffers (CIK).
4112 */
4113static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4114{
4115 u32 rb_cntl, reg_offset;
4116 int i;
4117
4118 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4119
4120 for (i = 0; i < 2; i++) {
4121 if (i == 0)
4122 reg_offset = SDMA0_REGISTER_OFFSET;
4123 else
4124 reg_offset = SDMA1_REGISTER_OFFSET;
4125 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4126 rb_cntl &= ~SDMA_RB_ENABLE;
4127 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4128 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4129 }
4130}
4131
4132/**
4133 * cik_sdma_rlc_stop - stop the compute async dma engines
4134 *
4135 * @rdev: radeon_device pointer
4136 *
4137 * Stop the compute async dma queues (CIK).
4138 */
4139static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4140{
4141 /* XXX todo */
4142}
4143
4144/**
4145 * cik_sdma_enable - stop the async dma engines
4146 *
4147 * @rdev: radeon_device pointer
4148 * @enable: enable/disable the DMA MEs.
4149 *
4150 * Halt or unhalt the async dma engines (CIK).
4151 */
4152static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4153{
4154 u32 me_cntl, reg_offset;
4155 int i;
4156
4157 for (i = 0; i < 2; i++) {
4158 if (i == 0)
4159 reg_offset = SDMA0_REGISTER_OFFSET;
4160 else
4161 reg_offset = SDMA1_REGISTER_OFFSET;
4162 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4163 if (enable)
4164 me_cntl &= ~SDMA_HALT;
4165 else
4166 me_cntl |= SDMA_HALT;
4167 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4168 }
4169}
4170
4171/**
4172 * cik_sdma_gfx_resume - setup and start the async dma engines
4173 *
4174 * @rdev: radeon_device pointer
4175 *
4176 * Set up the gfx DMA ring buffers and enable them (CIK).
4177 * Returns 0 for success, error for failure.
4178 */
4179static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4180{
4181 struct radeon_ring *ring;
4182 u32 rb_cntl, ib_cntl;
4183 u32 rb_bufsz;
4184 u32 reg_offset, wb_offset;
4185 int i, r;
4186
4187 for (i = 0; i < 2; i++) {
4188 if (i == 0) {
4189 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4190 reg_offset = SDMA0_REGISTER_OFFSET;
4191 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4192 } else {
4193 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4194 reg_offset = SDMA1_REGISTER_OFFSET;
4195 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4196 }
4197
4198 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4199 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4200
4201 /* Set ring buffer size in dwords */
4202 rb_bufsz = drm_order(ring->ring_size / 4);
4203 rb_cntl = rb_bufsz << 1;
4204#ifdef __BIG_ENDIAN
4205 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4206#endif
4207 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4208
4209 /* Initialize the ring buffer's read and write pointers */
4210 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4211 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4212
4213 /* set the wb address whether it's enabled or not */
4214 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4215 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4216 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4217 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4218
4219 if (rdev->wb.enabled)
4220 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4221
4222 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4223 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4224
4225 ring->wptr = 0;
4226 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4227
4228 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4229
4230 /* enable DMA RB */
4231 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4232
4233 ib_cntl = SDMA_IB_ENABLE;
4234#ifdef __BIG_ENDIAN
4235 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4236#endif
4237 /* enable DMA IBs */
4238 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4239
4240 ring->ready = true;
4241
4242 r = radeon_ring_test(rdev, ring->idx, ring);
4243 if (r) {
4244 ring->ready = false;
4245 return r;
4246 }
4247 }
4248
4249 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4250
4251 return 0;
4252}
4253
4254/**
4255 * cik_sdma_rlc_resume - setup and start the async dma engines
4256 *
4257 * @rdev: radeon_device pointer
4258 *
4259 * Set up the compute DMA queues and enable them (CIK).
4260 * Returns 0 for success, error for failure.
4261 */
4262static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4263{
4264 /* XXX todo */
4265 return 0;
4266}
4267
4268/**
4269 * cik_sdma_load_microcode - load the sDMA ME ucode
4270 *
4271 * @rdev: radeon_device pointer
4272 *
4273 * Loads the sDMA0/1 ucode.
4274 * Returns 0 for success, -EINVAL if the ucode is not available.
4275 */
4276static int cik_sdma_load_microcode(struct radeon_device *rdev)
4277{
4278 const __be32 *fw_data;
4279 int i;
4280
4281 if (!rdev->sdma_fw)
4282 return -EINVAL;
4283
4284 /* stop the gfx rings and rlc compute queues */
4285 cik_sdma_gfx_stop(rdev);
4286 cik_sdma_rlc_stop(rdev);
4287
4288 /* halt the MEs */
4289 cik_sdma_enable(rdev, false);
4290
4291 /* sdma0 */
4292 fw_data = (const __be32 *)rdev->sdma_fw->data;
4293 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4294 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4295 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4296 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4297
4298 /* sdma1 */
4299 fw_data = (const __be32 *)rdev->sdma_fw->data;
4300 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4301 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4302 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4303 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4304
4305 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4306 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4307 return 0;
4308}
4309
4310/**
4311 * cik_sdma_resume - setup and start the async dma engines
4312 *
4313 * @rdev: radeon_device pointer
4314 *
4315 * Set up the DMA engines and enable them (CIK).
4316 * Returns 0 for success, error for failure.
4317 */
4318static int cik_sdma_resume(struct radeon_device *rdev)
4319{
4320 int r;
4321
4322 /* Reset dma */
4323 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4324 RREG32(SRBM_SOFT_RESET);
4325 udelay(50);
4326 WREG32(SRBM_SOFT_RESET, 0);
4327 RREG32(SRBM_SOFT_RESET);
4328
4329 r = cik_sdma_load_microcode(rdev);
4330 if (r)
4331 return r;
4332
4333 /* unhalt the MEs */
4334 cik_sdma_enable(rdev, true);
4335
4336 /* start the gfx rings and rlc compute queues */
4337 r = cik_sdma_gfx_resume(rdev);
4338 if (r)
4339 return r;
4340 r = cik_sdma_rlc_resume(rdev);
4341 if (r)
4342 return r;
4343
4344 return 0;
4345}
4346
4347/**
4348 * cik_sdma_fini - tear down the async dma engines
4349 *
4350 * @rdev: radeon_device pointer
4351 *
4352 * Stop the async dma engines and free the rings (CIK).
4353 */
4354static void cik_sdma_fini(struct radeon_device *rdev)
4355{
4356 /* stop the gfx rings and rlc compute queues */
4357 cik_sdma_gfx_stop(rdev);
4358 cik_sdma_rlc_stop(rdev);
4359 /* halt the MEs */
4360 cik_sdma_enable(rdev, false);
4361 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4362 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4363 /* XXX - compute dma queue tear down */
4364}
4365
4366/**
4367 * cik_copy_dma - copy pages using the DMA engine
4368 *
4369 * @rdev: radeon_device pointer
4370 * @src_offset: src GPU address
4371 * @dst_offset: dst GPU address
4372 * @num_gpu_pages: number of GPU pages to xfer
4373 * @fence: radeon fence object
4374 *
4375 * Copy GPU paging using the DMA engine (CIK).
4376 * Used by the radeon ttm implementation to move pages if
4377 * registered as the asic copy callback.
4378 */
4379int cik_copy_dma(struct radeon_device *rdev,
4380 uint64_t src_offset, uint64_t dst_offset,
4381 unsigned num_gpu_pages,
4382 struct radeon_fence **fence)
4383{
4384 struct radeon_semaphore *sem = NULL;
4385 int ring_index = rdev->asic->copy.dma_ring_index;
4386 struct radeon_ring *ring = &rdev->ring[ring_index];
4387 u32 size_in_bytes, cur_size_in_bytes;
4388 int i, num_loops;
4389 int r = 0;
4390
4391 r = radeon_semaphore_create(rdev, &sem);
4392 if (r) {
4393 DRM_ERROR("radeon: moving bo (%d).\n", r);
4394 return r;
4395 }
4396
4397 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4398 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4399 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4400 if (r) {
4401 DRM_ERROR("radeon: moving bo (%d).\n", r);
4402 radeon_semaphore_free(rdev, &sem, NULL);
4403 return r;
4404 }
4405
4406 if (radeon_fence_need_sync(*fence, ring->idx)) {
4407 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4408 ring->idx);
4409 radeon_fence_note_sync(*fence, ring->idx);
4410 } else {
4411 radeon_semaphore_free(rdev, &sem, NULL);
4412 }
4413
4414 for (i = 0; i < num_loops; i++) {
4415 cur_size_in_bytes = size_in_bytes;
4416 if (cur_size_in_bytes > 0x1fffff)
4417 cur_size_in_bytes = 0x1fffff;
4418 size_in_bytes -= cur_size_in_bytes;
4419 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4420 radeon_ring_write(ring, cur_size_in_bytes);
4421 radeon_ring_write(ring, 0); /* src/dst endian swap */
4422 radeon_ring_write(ring, src_offset & 0xffffffff);
4423 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4424 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4425 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4426 src_offset += cur_size_in_bytes;
4427 dst_offset += cur_size_in_bytes;
4428 }
4429
4430 r = radeon_fence_emit(rdev, fence, ring->idx);
4431 if (r) {
4432 radeon_ring_unlock_undo(rdev, ring);
4433 return r;
4434 }
4435
4436 radeon_ring_unlock_commit(rdev, ring);
4437 radeon_semaphore_free(rdev, &sem, *fence);
4438
4439 return r;
4440}
4441
4442/**
4443 * cik_sdma_ring_test - simple async dma engine test
4444 *
4445 * @rdev: radeon_device pointer
4446 * @ring: radeon_ring structure holding ring information
4447 *
4448 * Test the DMA engine by writing using it to write an
4449 * value to memory. (CIK).
4450 * Returns 0 for success, error for failure.
4451 */
4452int cik_sdma_ring_test(struct radeon_device *rdev,
4453 struct radeon_ring *ring)
4454{
4455 unsigned i;
4456 int r;
4457 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4458 u32 tmp;
4459
4460 if (!ptr) {
4461 DRM_ERROR("invalid vram scratch pointer\n");
4462 return -EINVAL;
4463 }
4464
4465 tmp = 0xCAFEDEAD;
4466 writel(tmp, ptr);
4467
4468 r = radeon_ring_lock(rdev, ring, 4);
4469 if (r) {
4470 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4471 return r;
4472 }
4473 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4474 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4475 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4476 radeon_ring_write(ring, 1); /* number of DWs to follow */
4477 radeon_ring_write(ring, 0xDEADBEEF);
4478 radeon_ring_unlock_commit(rdev, ring);
4479
4480 for (i = 0; i < rdev->usec_timeout; i++) {
4481 tmp = readl(ptr);
4482 if (tmp == 0xDEADBEEF)
4483 break;
4484 DRM_UDELAY(1);
4485 }
4486
4487 if (i < rdev->usec_timeout) {
4488 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4489 } else {
4490 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4491 ring->idx, tmp);
4492 r = -EINVAL;
4493 }
4494 return r;
4495}
4496
4497/**
4498 * cik_sdma_ib_test - test an IB on the DMA engine
4499 *
4500 * @rdev: radeon_device pointer
4501 * @ring: radeon_ring structure holding ring information
4502 *
4503 * Test a simple IB in the DMA ring (CIK).
4504 * Returns 0 on success, error on failure.
4505 */
4506int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4507{
4508 struct radeon_ib ib;
4509 unsigned i;
4510 int r;
4511 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4512 u32 tmp = 0;
4513
4514 if (!ptr) {
4515 DRM_ERROR("invalid vram scratch pointer\n");
4516 return -EINVAL;
4517 }
4518
4519 tmp = 0xCAFEDEAD;
4520 writel(tmp, ptr);
4521
4522 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4523 if (r) {
4524 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4525 return r;
4526 }
4527
4528 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4529 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4530 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4531 ib.ptr[3] = 1;
4532 ib.ptr[4] = 0xDEADBEEF;
4533 ib.length_dw = 5;
4534
4535 r = radeon_ib_schedule(rdev, &ib, NULL);
4536 if (r) {
4537 radeon_ib_free(rdev, &ib);
4538 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4539 return r;
4540 }
4541 r = radeon_fence_wait(ib.fence, false);
4542 if (r) {
4543 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4544 return r;
4545 }
4546 for (i = 0; i < rdev->usec_timeout; i++) {
4547 tmp = readl(ptr);
4548 if (tmp == 0xDEADBEEF)
4549 break;
4550 DRM_UDELAY(1);
4551 }
4552 if (i < rdev->usec_timeout) {
4553 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4554 } else {
4555 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4556 r = -EINVAL;
4557 }
4558 radeon_ib_free(rdev, &ib);
4559 return r;
4560}
4561
4562
4563static void cik_print_gpu_status_regs(struct radeon_device *rdev) 3998static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4564{ 3999{
4565 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4000 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
@@ -4609,7 +4044,7 @@ static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4609 * mask to be used by cik_gpu_soft_reset(). 4044 * mask to be used by cik_gpu_soft_reset().
4610 * Returns a mask of the blocks to be reset. 4045 * Returns a mask of the blocks to be reset.
4611 */ 4046 */
4612static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4047u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4613{ 4048{
4614 u32 reset_mask = 0; 4049 u32 reset_mask = 0;
4615 u32 tmp; 4050 u32 tmp;
@@ -4860,34 +4295,6 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4860 return radeon_ring_test_lockup(rdev, ring); 4295 return radeon_ring_test_lockup(rdev, ring);
4861} 4296}
4862 4297
4863/**
4864 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4865 *
4866 * @rdev: radeon_device pointer
4867 * @ring: radeon_ring structure holding ring information
4868 *
4869 * Check if the async DMA engine is locked up (CIK).
4870 * Returns true if the engine appears to be locked up, false if not.
4871 */
4872bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4873{
4874 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4875 u32 mask;
4876
4877 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4878 mask = RADEON_RESET_DMA;
4879 else
4880 mask = RADEON_RESET_DMA1;
4881
4882 if (!(reset_mask & mask)) {
4883 radeon_ring_lockup_update(ring);
4884 return false;
4885 }
4886 /* force ring activities */
4887 radeon_ring_force_activity(rdev, ring);
4888 return radeon_ring_test_lockup(rdev, ring);
4889}
4890
4891/* MC */ 4298/* MC */
4892/** 4299/**
4893 * cik_mc_program - program the GPU memory controller 4300 * cik_mc_program - program the GPU memory controller
@@ -5424,131 +4831,8 @@ void cik_vm_set_page(struct radeon_device *rdev,
5424 } 4831 }
5425 } else { 4832 } else {
5426 /* DMA */ 4833 /* DMA */
5427 if (flags & RADEON_VM_PAGE_SYSTEM) { 4834 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
5428 while (count) {
5429 ndw = count * 2;
5430 if (ndw > 0xFFFFE)
5431 ndw = 0xFFFFE;
5432
5433 /* for non-physically contiguous pages (system) */
5434 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5435 ib->ptr[ib->length_dw++] = pe;
5436 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5437 ib->ptr[ib->length_dw++] = ndw;
5438 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5439 if (flags & RADEON_VM_PAGE_SYSTEM) {
5440 value = radeon_vm_map_gart(rdev, addr);
5441 value &= 0xFFFFFFFFFFFFF000ULL;
5442 } else if (flags & RADEON_VM_PAGE_VALID) {
5443 value = addr;
5444 } else {
5445 value = 0;
5446 }
5447 addr += incr;
5448 value |= r600_flags;
5449 ib->ptr[ib->length_dw++] = value;
5450 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5451 }
5452 }
5453 } else {
5454 while (count) {
5455 ndw = count;
5456 if (ndw > 0x7FFFF)
5457 ndw = 0x7FFFF;
5458
5459 if (flags & RADEON_VM_PAGE_VALID)
5460 value = addr;
5461 else
5462 value = 0;
5463 /* for physically contiguous pages (vram) */
5464 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5465 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5466 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5467 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5468 ib->ptr[ib->length_dw++] = 0;
5469 ib->ptr[ib->length_dw++] = value; /* value */
5470 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5471 ib->ptr[ib->length_dw++] = incr; /* increment size */
5472 ib->ptr[ib->length_dw++] = 0;
5473 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5474 pe += ndw * 8;
5475 addr += ndw * incr;
5476 count -= ndw;
5477 }
5478 }
5479 while (ib->length_dw & 0x7)
5480 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5481 }
5482}
5483
5484/**
5485 * cik_dma_vm_flush - cik vm flush using sDMA
5486 *
5487 * @rdev: radeon_device pointer
5488 *
5489 * Update the page table base and flush the VM TLB
5490 * using sDMA (CIK).
5491 */
5492void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5493{
5494 struct radeon_ring *ring = &rdev->ring[ridx];
5495 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5496 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5497 u32 ref_and_mask;
5498
5499 if (vm == NULL)
5500 return;
5501
5502 if (ridx == R600_RING_TYPE_DMA_INDEX)
5503 ref_and_mask = SDMA0;
5504 else
5505 ref_and_mask = SDMA1;
5506
5507 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5508 if (vm->id < 8) {
5509 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5510 } else {
5511 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5512 } 4835 }
5513 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5514
5515 /* update SH_MEM_* regs */
5516 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5517 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5518 radeon_ring_write(ring, VMID(vm->id));
5519
5520 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5521 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5522 radeon_ring_write(ring, 0);
5523
5524 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5525 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5526 radeon_ring_write(ring, 0);
5527
5528 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5529 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5530 radeon_ring_write(ring, 1);
5531
5532 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5533 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5534 radeon_ring_write(ring, 0);
5535
5536 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5537 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5538 radeon_ring_write(ring, VMID(0));
5539
5540 /* flush HDP */
5541 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5542 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5543 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5544 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5545 radeon_ring_write(ring, ref_and_mask); /* MASK */
5546 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5547
5548 /* flush TLB */
5549 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5550 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5551 radeon_ring_write(ring, 1 << vm->id);
5552} 4836}
5553 4837
5554/* 4838/*
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
new file mode 100644
index 000000000000..8925185a0049
--- /dev/null
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -0,0 +1,785 @@
1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <drm/drmP.h>
26#include "radeon.h"
27#include "radeon_asic.h"
28#include "cikd.h"
29
30/* sdma */
31#define CIK_SDMA_UCODE_SIZE 1050
32#define CIK_SDMA_UCODE_VERSION 64
33
34u32 cik_gpu_check_soft_reset(struct radeon_device *rdev);
35
36/*
37 * sDMA - System DMA
38 * Starting with CIK, the GPU has new asynchronous
39 * DMA engines. These engines are used for compute
40 * and gfx. There are two DMA engines (SDMA0, SDMA1)
41 * and each one supports 1 ring buffer used for gfx
42 * and 2 queues used for compute.
43 *
44 * The programming model is very similar to the CP
45 * (ring buffer, IBs, etc.), but sDMA has it's own
46 * packet format that is different from the PM4 format
47 * used by the CP. sDMA supports copying data, writing
48 * embedded data, solid fills, and a number of other
49 * things. It also has support for tiling/detiling of
50 * buffers.
51 */
52
53/**
54 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
55 *
56 * @rdev: radeon_device pointer
57 * @ib: IB object to schedule
58 *
59 * Schedule an IB in the DMA ring (CIK).
60 */
61void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
62 struct radeon_ib *ib)
63{
64 struct radeon_ring *ring = &rdev->ring[ib->ring];
65 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
66
67 if (rdev->wb.enabled) {
68 u32 next_rptr = ring->wptr + 5;
69 while ((next_rptr & 7) != 4)
70 next_rptr++;
71 next_rptr += 4;
72 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
73 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
74 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
75 radeon_ring_write(ring, 1); /* number of DWs to follow */
76 radeon_ring_write(ring, next_rptr);
77 }
78
79 /* IB packet must end on a 8 DW boundary */
80 while ((ring->wptr & 7) != 4)
81 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
82 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
83 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
84 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
85 radeon_ring_write(ring, ib->length_dw);
86
87}
88
89/**
90 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
91 *
92 * @rdev: radeon_device pointer
93 * @fence: radeon fence object
94 *
95 * Add a DMA fence packet to the ring to write
96 * the fence seq number and DMA trap packet to generate
97 * an interrupt if needed (CIK).
98 */
99void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
100 struct radeon_fence *fence)
101{
102 struct radeon_ring *ring = &rdev->ring[fence->ring];
103 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
104 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
105 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
106 u32 ref_and_mask;
107
108 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
109 ref_and_mask = SDMA0;
110 else
111 ref_and_mask = SDMA1;
112
113 /* write the fence */
114 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
115 radeon_ring_write(ring, addr & 0xffffffff);
116 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
117 radeon_ring_write(ring, fence->seq);
118 /* generate an interrupt */
119 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
120 /* flush HDP */
121 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
122 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
123 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
124 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
125 radeon_ring_write(ring, ref_and_mask); /* MASK */
126 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
127}
128
129/**
130 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
131 *
132 * @rdev: radeon_device pointer
133 * @ring: radeon_ring structure holding ring information
134 * @semaphore: radeon semaphore object
135 * @emit_wait: wait or signal semaphore
136 *
137 * Add a DMA semaphore packet to the ring wait on or signal
138 * other rings (CIK).
139 */
140void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
141 struct radeon_ring *ring,
142 struct radeon_semaphore *semaphore,
143 bool emit_wait)
144{
145 u64 addr = semaphore->gpu_addr;
146 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
147
148 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
149 radeon_ring_write(ring, addr & 0xfffffff8);
150 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
151}
152
153/**
154 * cik_sdma_gfx_stop - stop the gfx async dma engines
155 *
156 * @rdev: radeon_device pointer
157 *
158 * Stop the gfx async dma ring buffers (CIK).
159 */
160static void cik_sdma_gfx_stop(struct radeon_device *rdev)
161{
162 u32 rb_cntl, reg_offset;
163 int i;
164
165 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
166
167 for (i = 0; i < 2; i++) {
168 if (i == 0)
169 reg_offset = SDMA0_REGISTER_OFFSET;
170 else
171 reg_offset = SDMA1_REGISTER_OFFSET;
172 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
173 rb_cntl &= ~SDMA_RB_ENABLE;
174 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
175 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
176 }
177}
178
179/**
180 * cik_sdma_rlc_stop - stop the compute async dma engines
181 *
182 * @rdev: radeon_device pointer
183 *
184 * Stop the compute async dma queues (CIK).
185 */
186static void cik_sdma_rlc_stop(struct radeon_device *rdev)
187{
188 /* XXX todo */
189}
190
191/**
192 * cik_sdma_enable - stop the async dma engines
193 *
194 * @rdev: radeon_device pointer
195 * @enable: enable/disable the DMA MEs.
196 *
197 * Halt or unhalt the async dma engines (CIK).
198 */
199void cik_sdma_enable(struct radeon_device *rdev, bool enable)
200{
201 u32 me_cntl, reg_offset;
202 int i;
203
204 for (i = 0; i < 2; i++) {
205 if (i == 0)
206 reg_offset = SDMA0_REGISTER_OFFSET;
207 else
208 reg_offset = SDMA1_REGISTER_OFFSET;
209 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
210 if (enable)
211 me_cntl &= ~SDMA_HALT;
212 else
213 me_cntl |= SDMA_HALT;
214 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
215 }
216}
217
218/**
219 * cik_sdma_gfx_resume - setup and start the async dma engines
220 *
221 * @rdev: radeon_device pointer
222 *
223 * Set up the gfx DMA ring buffers and enable them (CIK).
224 * Returns 0 for success, error for failure.
225 */
226static int cik_sdma_gfx_resume(struct radeon_device *rdev)
227{
228 struct radeon_ring *ring;
229 u32 rb_cntl, ib_cntl;
230 u32 rb_bufsz;
231 u32 reg_offset, wb_offset;
232 int i, r;
233
234 for (i = 0; i < 2; i++) {
235 if (i == 0) {
236 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
237 reg_offset = SDMA0_REGISTER_OFFSET;
238 wb_offset = R600_WB_DMA_RPTR_OFFSET;
239 } else {
240 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
241 reg_offset = SDMA1_REGISTER_OFFSET;
242 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
243 }
244
245 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
246 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
247
248 /* Set ring buffer size in dwords */
249 rb_bufsz = drm_order(ring->ring_size / 4);
250 rb_cntl = rb_bufsz << 1;
251#ifdef __BIG_ENDIAN
252 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
253#endif
254 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
255
256 /* Initialize the ring buffer's read and write pointers */
257 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
258 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
259
260 /* set the wb address whether it's enabled or not */
261 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
262 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
263 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
264 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
265
266 if (rdev->wb.enabled)
267 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
268
269 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
270 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
271
272 ring->wptr = 0;
273 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
274
275 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
276
277 /* enable DMA RB */
278 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
279
280 ib_cntl = SDMA_IB_ENABLE;
281#ifdef __BIG_ENDIAN
282 ib_cntl |= SDMA_IB_SWAP_ENABLE;
283#endif
284 /* enable DMA IBs */
285 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
286
287 ring->ready = true;
288
289 r = radeon_ring_test(rdev, ring->idx, ring);
290 if (r) {
291 ring->ready = false;
292 return r;
293 }
294 }
295
296 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
297
298 return 0;
299}
300
301/**
302 * cik_sdma_rlc_resume - setup and start the async dma engines
303 *
304 * @rdev: radeon_device pointer
305 *
306 * Set up the compute DMA queues and enable them (CIK).
307 * Returns 0 for success, error for failure.
308 */
309static int cik_sdma_rlc_resume(struct radeon_device *rdev)
310{
311 /* XXX todo */
312 return 0;
313}
314
315/**
316 * cik_sdma_load_microcode - load the sDMA ME ucode
317 *
318 * @rdev: radeon_device pointer
319 *
320 * Loads the sDMA0/1 ucode.
321 * Returns 0 for success, -EINVAL if the ucode is not available.
322 */
323static int cik_sdma_load_microcode(struct radeon_device *rdev)
324{
325 const __be32 *fw_data;
326 int i;
327
328 if (!rdev->sdma_fw)
329 return -EINVAL;
330
331 /* stop the gfx rings and rlc compute queues */
332 cik_sdma_gfx_stop(rdev);
333 cik_sdma_rlc_stop(rdev);
334
335 /* halt the MEs */
336 cik_sdma_enable(rdev, false);
337
338 /* sdma0 */
339 fw_data = (const __be32 *)rdev->sdma_fw->data;
340 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
341 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
342 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
343 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
344
345 /* sdma1 */
346 fw_data = (const __be32 *)rdev->sdma_fw->data;
347 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
348 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
349 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
350 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
351
352 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
353 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
354 return 0;
355}
356
357/**
358 * cik_sdma_resume - setup and start the async dma engines
359 *
360 * @rdev: radeon_device pointer
361 *
362 * Set up the DMA engines and enable them (CIK).
363 * Returns 0 for success, error for failure.
364 */
365int cik_sdma_resume(struct radeon_device *rdev)
366{
367 int r;
368
369 /* Reset dma */
370 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
371 RREG32(SRBM_SOFT_RESET);
372 udelay(50);
373 WREG32(SRBM_SOFT_RESET, 0);
374 RREG32(SRBM_SOFT_RESET);
375
376 r = cik_sdma_load_microcode(rdev);
377 if (r)
378 return r;
379
380 /* unhalt the MEs */
381 cik_sdma_enable(rdev, true);
382
383 /* start the gfx rings and rlc compute queues */
384 r = cik_sdma_gfx_resume(rdev);
385 if (r)
386 return r;
387 r = cik_sdma_rlc_resume(rdev);
388 if (r)
389 return r;
390
391 return 0;
392}
393
394/**
395 * cik_sdma_fini - tear down the async dma engines
396 *
397 * @rdev: radeon_device pointer
398 *
399 * Stop the async dma engines and free the rings (CIK).
400 */
401void cik_sdma_fini(struct radeon_device *rdev)
402{
403 /* stop the gfx rings and rlc compute queues */
404 cik_sdma_gfx_stop(rdev);
405 cik_sdma_rlc_stop(rdev);
406 /* halt the MEs */
407 cik_sdma_enable(rdev, false);
408 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
409 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
410 /* XXX - compute dma queue tear down */
411}
412
413/**
414 * cik_copy_dma - copy pages using the DMA engine
415 *
416 * @rdev: radeon_device pointer
417 * @src_offset: src GPU address
418 * @dst_offset: dst GPU address
419 * @num_gpu_pages: number of GPU pages to xfer
420 * @fence: radeon fence object
421 *
422 * Copy GPU paging using the DMA engine (CIK).
423 * Used by the radeon ttm implementation to move pages if
424 * registered as the asic copy callback.
425 */
426int cik_copy_dma(struct radeon_device *rdev,
427 uint64_t src_offset, uint64_t dst_offset,
428 unsigned num_gpu_pages,
429 struct radeon_fence **fence)
430{
431 struct radeon_semaphore *sem = NULL;
432 int ring_index = rdev->asic->copy.dma_ring_index;
433 struct radeon_ring *ring = &rdev->ring[ring_index];
434 u32 size_in_bytes, cur_size_in_bytes;
435 int i, num_loops;
436 int r = 0;
437
438 r = radeon_semaphore_create(rdev, &sem);
439 if (r) {
440 DRM_ERROR("radeon: moving bo (%d).\n", r);
441 return r;
442 }
443
444 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
445 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
446 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
447 if (r) {
448 DRM_ERROR("radeon: moving bo (%d).\n", r);
449 radeon_semaphore_free(rdev, &sem, NULL);
450 return r;
451 }
452
453 if (radeon_fence_need_sync(*fence, ring->idx)) {
454 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
455 ring->idx);
456 radeon_fence_note_sync(*fence, ring->idx);
457 } else {
458 radeon_semaphore_free(rdev, &sem, NULL);
459 }
460
461 for (i = 0; i < num_loops; i++) {
462 cur_size_in_bytes = size_in_bytes;
463 if (cur_size_in_bytes > 0x1fffff)
464 cur_size_in_bytes = 0x1fffff;
465 size_in_bytes -= cur_size_in_bytes;
466 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
467 radeon_ring_write(ring, cur_size_in_bytes);
468 radeon_ring_write(ring, 0); /* src/dst endian swap */
469 radeon_ring_write(ring, src_offset & 0xffffffff);
470 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
471 radeon_ring_write(ring, dst_offset & 0xfffffffc);
472 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
473 src_offset += cur_size_in_bytes;
474 dst_offset += cur_size_in_bytes;
475 }
476
477 r = radeon_fence_emit(rdev, fence, ring->idx);
478 if (r) {
479 radeon_ring_unlock_undo(rdev, ring);
480 return r;
481 }
482
483 radeon_ring_unlock_commit(rdev, ring);
484 radeon_semaphore_free(rdev, &sem, *fence);
485
486 return r;
487}
488
489/**
490 * cik_sdma_ring_test - simple async dma engine test
491 *
492 * @rdev: radeon_device pointer
493 * @ring: radeon_ring structure holding ring information
494 *
495 * Test the DMA engine by writing using it to write an
496 * value to memory. (CIK).
497 * Returns 0 for success, error for failure.
498 */
499int cik_sdma_ring_test(struct radeon_device *rdev,
500 struct radeon_ring *ring)
501{
502 unsigned i;
503 int r;
504 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
505 u32 tmp;
506
507 if (!ptr) {
508 DRM_ERROR("invalid vram scratch pointer\n");
509 return -EINVAL;
510 }
511
512 tmp = 0xCAFEDEAD;
513 writel(tmp, ptr);
514
515 r = radeon_ring_lock(rdev, ring, 4);
516 if (r) {
517 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
518 return r;
519 }
520 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
521 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
522 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
523 radeon_ring_write(ring, 1); /* number of DWs to follow */
524 radeon_ring_write(ring, 0xDEADBEEF);
525 radeon_ring_unlock_commit(rdev, ring);
526
527 for (i = 0; i < rdev->usec_timeout; i++) {
528 tmp = readl(ptr);
529 if (tmp == 0xDEADBEEF)
530 break;
531 DRM_UDELAY(1);
532 }
533
534 if (i < rdev->usec_timeout) {
535 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
536 } else {
537 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
538 ring->idx, tmp);
539 r = -EINVAL;
540 }
541 return r;
542}
543
544/**
545 * cik_sdma_ib_test - test an IB on the DMA engine
546 *
547 * @rdev: radeon_device pointer
548 * @ring: radeon_ring structure holding ring information
549 *
550 * Test a simple IB in the DMA ring (CIK).
551 * Returns 0 on success, error on failure.
552 */
553int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
554{
555 struct radeon_ib ib;
556 unsigned i;
557 int r;
558 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
559 u32 tmp = 0;
560
561 if (!ptr) {
562 DRM_ERROR("invalid vram scratch pointer\n");
563 return -EINVAL;
564 }
565
566 tmp = 0xCAFEDEAD;
567 writel(tmp, ptr);
568
569 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
570 if (r) {
571 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
572 return r;
573 }
574
575 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
576 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
577 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
578 ib.ptr[3] = 1;
579 ib.ptr[4] = 0xDEADBEEF;
580 ib.length_dw = 5;
581
582 r = radeon_ib_schedule(rdev, &ib, NULL);
583 if (r) {
584 radeon_ib_free(rdev, &ib);
585 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
586 return r;
587 }
588 r = radeon_fence_wait(ib.fence, false);
589 if (r) {
590 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
591 return r;
592 }
593 for (i = 0; i < rdev->usec_timeout; i++) {
594 tmp = readl(ptr);
595 if (tmp == 0xDEADBEEF)
596 break;
597 DRM_UDELAY(1);
598 }
599 if (i < rdev->usec_timeout) {
600 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
601 } else {
602 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
603 r = -EINVAL;
604 }
605 radeon_ib_free(rdev, &ib);
606 return r;
607}
608
609/**
610 * cik_sdma_is_lockup - Check if the DMA engine is locked up
611 *
612 * @rdev: radeon_device pointer
613 * @ring: radeon_ring structure holding ring information
614 *
615 * Check if the async DMA engine is locked up (CIK).
616 * Returns true if the engine appears to be locked up, false if not.
617 */
618bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
619{
620 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
621 u32 mask;
622
623 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
624 mask = RADEON_RESET_DMA;
625 else
626 mask = RADEON_RESET_DMA1;
627
628 if (!(reset_mask & mask)) {
629 radeon_ring_lockup_update(ring);
630 return false;
631 }
632 /* force ring activities */
633 radeon_ring_force_activity(rdev, ring);
634 return radeon_ring_test_lockup(rdev, ring);
635}
636
637/**
638 * cik_sdma_vm_set_page - update the page tables using sDMA
639 *
640 * @rdev: radeon_device pointer
641 * @ib: indirect buffer to fill with commands
642 * @pe: addr of the page entry
643 * @addr: dst addr to write into pe
644 * @count: number of page entries to update
645 * @incr: increase next addr by incr bytes
646 * @flags: access flags
647 *
648 * Update the page tables using sDMA (CIK).
649 */
650void cik_sdma_vm_set_page(struct radeon_device *rdev,
651 struct radeon_ib *ib,
652 uint64_t pe,
653 uint64_t addr, unsigned count,
654 uint32_t incr, uint32_t flags)
655{
656 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
657 uint64_t value;
658 unsigned ndw;
659
660 if (flags & RADEON_VM_PAGE_SYSTEM) {
661 while (count) {
662 ndw = count * 2;
663 if (ndw > 0xFFFFE)
664 ndw = 0xFFFFE;
665
666 /* for non-physically contiguous pages (system) */
667 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
668 ib->ptr[ib->length_dw++] = pe;
669 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
670 ib->ptr[ib->length_dw++] = ndw;
671 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
672 if (flags & RADEON_VM_PAGE_SYSTEM) {
673 value = radeon_vm_map_gart(rdev, addr);
674 value &= 0xFFFFFFFFFFFFF000ULL;
675 } else if (flags & RADEON_VM_PAGE_VALID) {
676 value = addr;
677 } else {
678 value = 0;
679 }
680 addr += incr;
681 value |= r600_flags;
682 ib->ptr[ib->length_dw++] = value;
683 ib->ptr[ib->length_dw++] = upper_32_bits(value);
684 }
685 }
686 } else {
687 while (count) {
688 ndw = count;
689 if (ndw > 0x7FFFF)
690 ndw = 0x7FFFF;
691
692 if (flags & RADEON_VM_PAGE_VALID)
693 value = addr;
694 else
695 value = 0;
696 /* for physically contiguous pages (vram) */
697 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
698 ib->ptr[ib->length_dw++] = pe; /* dst addr */
699 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
700 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
701 ib->ptr[ib->length_dw++] = 0;
702 ib->ptr[ib->length_dw++] = value; /* value */
703 ib->ptr[ib->length_dw++] = upper_32_bits(value);
704 ib->ptr[ib->length_dw++] = incr; /* increment size */
705 ib->ptr[ib->length_dw++] = 0;
706 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
707 pe += ndw * 8;
708 addr += ndw * incr;
709 count -= ndw;
710 }
711 }
712 while (ib->length_dw & 0x7)
713 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
714}
715
716/**
717 * cik_dma_vm_flush - cik vm flush using sDMA
718 *
719 * @rdev: radeon_device pointer
720 *
721 * Update the page table base and flush the VM TLB
722 * using sDMA (CIK).
723 */
724void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
725{
726 struct radeon_ring *ring = &rdev->ring[ridx];
727 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
728 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
729 u32 ref_and_mask;
730
731 if (vm == NULL)
732 return;
733
734 if (ridx == R600_RING_TYPE_DMA_INDEX)
735 ref_and_mask = SDMA0;
736 else
737 ref_and_mask = SDMA1;
738
739 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
740 if (vm->id < 8) {
741 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
742 } else {
743 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
744 }
745 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
746
747 /* update SH_MEM_* regs */
748 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
749 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
750 radeon_ring_write(ring, VMID(vm->id));
751
752 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
753 radeon_ring_write(ring, SH_MEM_BASES >> 2);
754 radeon_ring_write(ring, 0);
755
756 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
757 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
758 radeon_ring_write(ring, 0);
759
760 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
761 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
762 radeon_ring_write(ring, 1);
763
764 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
765 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
766 radeon_ring_write(ring, 0);
767
768 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
769 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
770 radeon_ring_write(ring, VMID(0));
771
772 /* flush HDP */
773 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
774 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
775 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
776 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
777 radeon_ring_write(ring, ref_and_mask); /* MASK */
778 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
779
780 /* flush TLB */
781 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
782 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
783 radeon_ring_write(ring, 1 << vm->id);
784}
785
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 52ed22333f0d..bbaa4f2056ce 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3613,7 +3613,7 @@ bool evergreen_is_display_hung(struct radeon_device *rdev)
3613 return true; 3613 return true;
3614} 3614}
3615 3615
3616static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) 3616u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev)
3617{ 3617{
3618 u32 reset_mask = 0; 3618 u32 reset_mask = 0;
3619 u32 tmp; 3619 u32 tmp;
@@ -3839,28 +3839,6 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin
3839 return radeon_ring_test_lockup(rdev, ring); 3839 return radeon_ring_test_lockup(rdev, ring);
3840} 3840}
3841 3841
3842/**
3843 * evergreen_dma_is_lockup - Check if the DMA engine is locked up
3844 *
3845 * @rdev: radeon_device pointer
3846 * @ring: radeon_ring structure holding ring information
3847 *
3848 * Check if the async DMA engine is locked up.
3849 * Returns true if the engine appears to be locked up, false if not.
3850 */
3851bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3852{
3853 u32 reset_mask = evergreen_gpu_check_soft_reset(rdev);
3854
3855 if (!(reset_mask & RADEON_RESET_DMA)) {
3856 radeon_ring_lockup_update(ring);
3857 return false;
3858 }
3859 /* force ring activities */
3860 radeon_ring_force_activity(rdev, ring);
3861 return radeon_ring_test_lockup(rdev, ring);
3862}
3863
3864/* 3842/*
3865 * RLC 3843 * RLC
3866 */ 3844 */
@@ -5024,143 +5002,6 @@ restart_ih:
5024 return IRQ_HANDLED; 5002 return IRQ_HANDLED;
5025} 5003}
5026 5004
5027/**
5028 * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
5029 *
5030 * @rdev: radeon_device pointer
5031 * @fence: radeon fence object
5032 *
5033 * Add a DMA fence packet to the ring to write
5034 * the fence seq number and DMA trap packet to generate
5035 * an interrupt if needed (evergreen-SI).
5036 */
5037void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
5038 struct radeon_fence *fence)
5039{
5040 struct radeon_ring *ring = &rdev->ring[fence->ring];
5041 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
5042 /* write the fence */
5043 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
5044 radeon_ring_write(ring, addr & 0xfffffffc);
5045 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
5046 radeon_ring_write(ring, fence->seq);
5047 /* generate an interrupt */
5048 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
5049 /* flush HDP */
5050 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
5051 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
5052 radeon_ring_write(ring, 1);
5053}
5054
5055/**
5056 * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
5057 *
5058 * @rdev: radeon_device pointer
5059 * @ib: IB object to schedule
5060 *
5061 * Schedule an IB in the DMA ring (evergreen).
5062 */
5063void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
5064 struct radeon_ib *ib)
5065{
5066 struct radeon_ring *ring = &rdev->ring[ib->ring];
5067
5068 if (rdev->wb.enabled) {
5069 u32 next_rptr = ring->wptr + 4;
5070 while ((next_rptr & 7) != 5)
5071 next_rptr++;
5072 next_rptr += 3;
5073 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
5074 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5075 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
5076 radeon_ring_write(ring, next_rptr);
5077 }
5078
5079 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
5080 * Pad as necessary with NOPs.
5081 */
5082 while ((ring->wptr & 7) != 5)
5083 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
5084 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
5085 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
5086 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
5087
5088}
5089
5090/**
5091 * evergreen_copy_dma - copy pages using the DMA engine
5092 *
5093 * @rdev: radeon_device pointer
5094 * @src_offset: src GPU address
5095 * @dst_offset: dst GPU address
5096 * @num_gpu_pages: number of GPU pages to xfer
5097 * @fence: radeon fence object
5098 *
5099 * Copy GPU paging using the DMA engine (evergreen-cayman).
5100 * Used by the radeon ttm implementation to move pages if
5101 * registered as the asic copy callback.
5102 */
5103int evergreen_copy_dma(struct radeon_device *rdev,
5104 uint64_t src_offset, uint64_t dst_offset,
5105 unsigned num_gpu_pages,
5106 struct radeon_fence **fence)
5107{
5108 struct radeon_semaphore *sem = NULL;
5109 int ring_index = rdev->asic->copy.dma_ring_index;
5110 struct radeon_ring *ring = &rdev->ring[ring_index];
5111 u32 size_in_dw, cur_size_in_dw;
5112 int i, num_loops;
5113 int r = 0;
5114
5115 r = radeon_semaphore_create(rdev, &sem);
5116 if (r) {
5117 DRM_ERROR("radeon: moving bo (%d).\n", r);
5118 return r;
5119 }
5120
5121 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
5122 num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
5123 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5124 if (r) {
5125 DRM_ERROR("radeon: moving bo (%d).\n", r);
5126 radeon_semaphore_free(rdev, &sem, NULL);
5127 return r;
5128 }
5129
5130 if (radeon_fence_need_sync(*fence, ring->idx)) {
5131 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5132 ring->idx);
5133 radeon_fence_note_sync(*fence, ring->idx);
5134 } else {
5135 radeon_semaphore_free(rdev, &sem, NULL);
5136 }
5137
5138 for (i = 0; i < num_loops; i++) {
5139 cur_size_in_dw = size_in_dw;
5140 if (cur_size_in_dw > 0xFFFFF)
5141 cur_size_in_dw = 0xFFFFF;
5142 size_in_dw -= cur_size_in_dw;
5143 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
5144 radeon_ring_write(ring, dst_offset & 0xfffffffc);
5145 radeon_ring_write(ring, src_offset & 0xfffffffc);
5146 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5147 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5148 src_offset += cur_size_in_dw * 4;
5149 dst_offset += cur_size_in_dw * 4;
5150 }
5151
5152 r = radeon_fence_emit(rdev, fence, ring->idx);
5153 if (r) {
5154 radeon_ring_unlock_undo(rdev, ring);
5155 return r;
5156 }
5157
5158 radeon_ring_unlock_commit(rdev, ring);
5159 radeon_semaphore_free(rdev, &sem, *fence);
5160
5161 return r;
5162}
5163
5164static int evergreen_startup(struct radeon_device *rdev) 5005static int evergreen_startup(struct radeon_device *rdev)
5165{ 5006{
5166 struct radeon_ring *ring; 5007 struct radeon_ring *ring;
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
new file mode 100644
index 000000000000..6a0656d00ed0
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -0,0 +1,190 @@
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <drm/drmP.h>
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "evergreend.h"
28
29u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev);
30
31/**
32 * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
33 *
34 * @rdev: radeon_device pointer
35 * @fence: radeon fence object
36 *
37 * Add a DMA fence packet to the ring to write
38 * the fence seq number and DMA trap packet to generate
39 * an interrupt if needed (evergreen-SI).
40 */
41void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
42 struct radeon_fence *fence)
43{
44 struct radeon_ring *ring = &rdev->ring[fence->ring];
45 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
46 /* write the fence */
47 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
48 radeon_ring_write(ring, addr & 0xfffffffc);
49 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
50 radeon_ring_write(ring, fence->seq);
51 /* generate an interrupt */
52 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
53 /* flush HDP */
54 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
55 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
56 radeon_ring_write(ring, 1);
57}
58
59/**
60 * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
61 *
62 * @rdev: radeon_device pointer
63 * @ib: IB object to schedule
64 *
65 * Schedule an IB in the DMA ring (evergreen).
66 */
67void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
68 struct radeon_ib *ib)
69{
70 struct radeon_ring *ring = &rdev->ring[ib->ring];
71
72 if (rdev->wb.enabled) {
73 u32 next_rptr = ring->wptr + 4;
74 while ((next_rptr & 7) != 5)
75 next_rptr++;
76 next_rptr += 3;
77 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
78 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
79 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
80 radeon_ring_write(ring, next_rptr);
81 }
82
83 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
84 * Pad as necessary with NOPs.
85 */
86 while ((ring->wptr & 7) != 5)
87 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
88 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
89 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
90 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
91
92}
93
94/**
95 * evergreen_copy_dma - copy pages using the DMA engine
96 *
97 * @rdev: radeon_device pointer
98 * @src_offset: src GPU address
99 * @dst_offset: dst GPU address
100 * @num_gpu_pages: number of GPU pages to xfer
101 * @fence: radeon fence object
102 *
103 * Copy GPU paging using the DMA engine (evergreen-cayman).
104 * Used by the radeon ttm implementation to move pages if
105 * registered as the asic copy callback.
106 */
107int evergreen_copy_dma(struct radeon_device *rdev,
108 uint64_t src_offset, uint64_t dst_offset,
109 unsigned num_gpu_pages,
110 struct radeon_fence **fence)
111{
112 struct radeon_semaphore *sem = NULL;
113 int ring_index = rdev->asic->copy.dma_ring_index;
114 struct radeon_ring *ring = &rdev->ring[ring_index];
115 u32 size_in_dw, cur_size_in_dw;
116 int i, num_loops;
117 int r = 0;
118
119 r = radeon_semaphore_create(rdev, &sem);
120 if (r) {
121 DRM_ERROR("radeon: moving bo (%d).\n", r);
122 return r;
123 }
124
125 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
126 num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
127 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
128 if (r) {
129 DRM_ERROR("radeon: moving bo (%d).\n", r);
130 radeon_semaphore_free(rdev, &sem, NULL);
131 return r;
132 }
133
134 if (radeon_fence_need_sync(*fence, ring->idx)) {
135 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
136 ring->idx);
137 radeon_fence_note_sync(*fence, ring->idx);
138 } else {
139 radeon_semaphore_free(rdev, &sem, NULL);
140 }
141
142 for (i = 0; i < num_loops; i++) {
143 cur_size_in_dw = size_in_dw;
144 if (cur_size_in_dw > 0xFFFFF)
145 cur_size_in_dw = 0xFFFFF;
146 size_in_dw -= cur_size_in_dw;
147 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
148 radeon_ring_write(ring, dst_offset & 0xfffffffc);
149 radeon_ring_write(ring, src_offset & 0xfffffffc);
150 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
151 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
152 src_offset += cur_size_in_dw * 4;
153 dst_offset += cur_size_in_dw * 4;
154 }
155
156 r = radeon_fence_emit(rdev, fence, ring->idx);
157 if (r) {
158 radeon_ring_unlock_undo(rdev, ring);
159 return r;
160 }
161
162 radeon_ring_unlock_commit(rdev, ring);
163 radeon_semaphore_free(rdev, &sem, *fence);
164
165 return r;
166}
167
168/**
169 * evergreen_dma_is_lockup - Check if the DMA engine is locked up
170 *
171 * @rdev: radeon_device pointer
172 * @ring: radeon_ring structure holding ring information
173 *
174 * Check if the async DMA engine is locked up.
175 * Returns true if the engine appears to be locked up, false if not.
176 */
177bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
178{
179 u32 reset_mask = evergreen_gpu_check_soft_reset(rdev);
180
181 if (!(reset_mask & RADEON_RESET_DMA)) {
182 radeon_ring_lockup_update(ring);
183 return false;
184 }
185 /* force ring activities */
186 radeon_ring_force_activity(rdev, ring);
187 return radeon_ring_test_lockup(rdev, ring);
188}
189
190
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 0205fa1594fa..2db8ce0023ac 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -174,6 +174,11 @@ extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
174extern void evergreen_program_aspm(struct radeon_device *rdev); 174extern void evergreen_program_aspm(struct radeon_device *rdev);
175extern void sumo_rlc_fini(struct radeon_device *rdev); 175extern void sumo_rlc_fini(struct radeon_device *rdev);
176extern int sumo_rlc_init(struct radeon_device *rdev); 176extern int sumo_rlc_init(struct radeon_device *rdev);
177extern void cayman_dma_vm_set_page(struct radeon_device *rdev,
178 struct radeon_ib *ib,
179 uint64_t pe,
180 uint64_t addr, unsigned count,
181 uint32_t incr, uint32_t flags);
177 182
178/* Firmware Names */ 183/* Firmware Names */
179MODULE_FIRMWARE("radeon/BARTS_pfp.bin"); 184MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
@@ -1595,186 +1600,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
1595 return 0; 1600 return 0;
1596} 1601}
1597 1602
1598/* 1603u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1599 * DMA
1600 * Starting with R600, the GPU has an asynchronous
1601 * DMA engine. The programming model is very similar
1602 * to the 3D engine (ring buffer, IBs, etc.), but the
1603 * DMA controller has it's own packet format that is
1604 * different form the PM4 format used by the 3D engine.
1605 * It supports copying data, writing embedded data,
1606 * solid fills, and a number of other things. It also
1607 * has support for tiling/detiling of buffers.
1608 * Cayman and newer support two asynchronous DMA engines.
1609 */
1610/**
1611 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1612 *
1613 * @rdev: radeon_device pointer
1614 * @ib: IB object to schedule
1615 *
1616 * Schedule an IB in the DMA ring (cayman-SI).
1617 */
1618void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1619 struct radeon_ib *ib)
1620{
1621 struct radeon_ring *ring = &rdev->ring[ib->ring];
1622
1623 if (rdev->wb.enabled) {
1624 u32 next_rptr = ring->wptr + 4;
1625 while ((next_rptr & 7) != 5)
1626 next_rptr++;
1627 next_rptr += 3;
1628 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1629 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1630 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1631 radeon_ring_write(ring, next_rptr);
1632 }
1633
1634 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1635 * Pad as necessary with NOPs.
1636 */
1637 while ((ring->wptr & 7) != 5)
1638 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1639 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1640 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1641 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1642
1643}
1644
1645/**
1646 * cayman_dma_stop - stop the async dma engines
1647 *
1648 * @rdev: radeon_device pointer
1649 *
1650 * Stop the async dma engines (cayman-SI).
1651 */
1652void cayman_dma_stop(struct radeon_device *rdev)
1653{
1654 u32 rb_cntl;
1655
1656 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1657
1658 /* dma0 */
1659 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1660 rb_cntl &= ~DMA_RB_ENABLE;
1661 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1662
1663 /* dma1 */
1664 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1665 rb_cntl &= ~DMA_RB_ENABLE;
1666 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1667
1668 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1669 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1670}
1671
1672/**
1673 * cayman_dma_resume - setup and start the async dma engines
1674 *
1675 * @rdev: radeon_device pointer
1676 *
1677 * Set up the DMA ring buffers and enable them. (cayman-SI).
1678 * Returns 0 for success, error for failure.
1679 */
1680int cayman_dma_resume(struct radeon_device *rdev)
1681{
1682 struct radeon_ring *ring;
1683 u32 rb_cntl, dma_cntl, ib_cntl;
1684 u32 rb_bufsz;
1685 u32 reg_offset, wb_offset;
1686 int i, r;
1687
1688 /* Reset dma */
1689 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1690 RREG32(SRBM_SOFT_RESET);
1691 udelay(50);
1692 WREG32(SRBM_SOFT_RESET, 0);
1693
1694 for (i = 0; i < 2; i++) {
1695 if (i == 0) {
1696 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1697 reg_offset = DMA0_REGISTER_OFFSET;
1698 wb_offset = R600_WB_DMA_RPTR_OFFSET;
1699 } else {
1700 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1701 reg_offset = DMA1_REGISTER_OFFSET;
1702 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1703 }
1704
1705 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1706 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1707
1708 /* Set ring buffer size in dwords */
1709 rb_bufsz = drm_order(ring->ring_size / 4);
1710 rb_cntl = rb_bufsz << 1;
1711#ifdef __BIG_ENDIAN
1712 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1713#endif
1714 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1715
1716 /* Initialize the ring buffer's read and write pointers */
1717 WREG32(DMA_RB_RPTR + reg_offset, 0);
1718 WREG32(DMA_RB_WPTR + reg_offset, 0);
1719
1720 /* set the wb address whether it's enabled or not */
1721 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1722 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1723 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1724 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1725
1726 if (rdev->wb.enabled)
1727 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1728
1729 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1730
1731 /* enable DMA IBs */
1732 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1733#ifdef __BIG_ENDIAN
1734 ib_cntl |= DMA_IB_SWAP_ENABLE;
1735#endif
1736 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1737
1738 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1739 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1740 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1741
1742 ring->wptr = 0;
1743 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1744
1745 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1746
1747 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1748
1749 ring->ready = true;
1750
1751 r = radeon_ring_test(rdev, ring->idx, ring);
1752 if (r) {
1753 ring->ready = false;
1754 return r;
1755 }
1756 }
1757
1758 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1759
1760 return 0;
1761}
1762
1763/**
1764 * cayman_dma_fini - tear down the async dma engines
1765 *
1766 * @rdev: radeon_device pointer
1767 *
1768 * Stop the async dma engines and free the rings (cayman-SI).
1769 */
1770void cayman_dma_fini(struct radeon_device *rdev)
1771{
1772 cayman_dma_stop(rdev);
1773 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1774 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1775}
1776
1777static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1778{ 1604{
1779 u32 reset_mask = 0; 1605 u32 reset_mask = 0;
1780 u32 tmp; 1606 u32 tmp;
@@ -2027,34 +1853,6 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2027 return radeon_ring_test_lockup(rdev, ring); 1853 return radeon_ring_test_lockup(rdev, ring);
2028} 1854}
2029 1855
2030/**
2031 * cayman_dma_is_lockup - Check if the DMA engine is locked up
2032 *
2033 * @rdev: radeon_device pointer
2034 * @ring: radeon_ring structure holding ring information
2035 *
2036 * Check if the async DMA engine is locked up.
2037 * Returns true if the engine appears to be locked up, false if not.
2038 */
2039bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2040{
2041 u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2042 u32 mask;
2043
2044 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2045 mask = RADEON_RESET_DMA;
2046 else
2047 mask = RADEON_RESET_DMA1;
2048
2049 if (!(reset_mask & mask)) {
2050 radeon_ring_lockup_update(ring);
2051 return false;
2052 }
2053 /* force ring activities */
2054 radeon_ring_force_activity(rdev, ring);
2055 return radeon_ring_test_lockup(rdev, ring);
2056}
2057
2058static int cayman_startup(struct radeon_device *rdev) 1856static int cayman_startup(struct radeon_device *rdev)
2059{ 1857{
2060 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1858 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -2658,61 +2456,7 @@ void cayman_vm_set_page(struct radeon_device *rdev,
2658 } 2456 }
2659 } 2457 }
2660 } else { 2458 } else {
2661 if ((flags & RADEON_VM_PAGE_SYSTEM) || 2459 cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
2662 (count == 1)) {
2663 while (count) {
2664 ndw = count * 2;
2665 if (ndw > 0xFFFFE)
2666 ndw = 0xFFFFE;
2667
2668 /* for non-physically contiguous pages (system) */
2669 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2670 ib->ptr[ib->length_dw++] = pe;
2671 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2672 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2673 if (flags & RADEON_VM_PAGE_SYSTEM) {
2674 value = radeon_vm_map_gart(rdev, addr);
2675 value &= 0xFFFFFFFFFFFFF000ULL;
2676 } else if (flags & RADEON_VM_PAGE_VALID) {
2677 value = addr;
2678 } else {
2679 value = 0;
2680 }
2681 addr += incr;
2682 value |= r600_flags;
2683 ib->ptr[ib->length_dw++] = value;
2684 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2685 }
2686 }
2687 while (ib->length_dw & 0x7)
2688 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2689 } else {
2690 while (count) {
2691 ndw = count * 2;
2692 if (ndw > 0xFFFFE)
2693 ndw = 0xFFFFE;
2694
2695 if (flags & RADEON_VM_PAGE_VALID)
2696 value = addr;
2697 else
2698 value = 0;
2699 /* for physically contiguous pages (vram) */
2700 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2701 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2702 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2703 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2704 ib->ptr[ib->length_dw++] = 0;
2705 ib->ptr[ib->length_dw++] = value; /* value */
2706 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2707 ib->ptr[ib->length_dw++] = incr; /* increment size */
2708 ib->ptr[ib->length_dw++] = 0;
2709 pe += ndw * 4;
2710 addr += (ndw / 2) * incr;
2711 count -= ndw / 2;
2712 }
2713 }
2714 while (ib->length_dw & 0x7)
2715 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2716 } 2460 }
2717} 2461}
2718 2462
@@ -2746,26 +2490,3 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2746 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 2490 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2747 radeon_ring_write(ring, 0x0); 2491 radeon_ring_write(ring, 0x0);
2748} 2492}
2749
2750void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2751{
2752 struct radeon_ring *ring = &rdev->ring[ridx];
2753
2754 if (vm == NULL)
2755 return;
2756
2757 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2758 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2759 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2760
2761 /* flush hdp cache */
2762 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2763 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2764 radeon_ring_write(ring, 1);
2765
2766 /* bits 0-7 are the VM contexts0-7 */
2767 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2768 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2769 radeon_ring_write(ring, 1 << vm->id);
2770}
2771
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
new file mode 100644
index 000000000000..0f3c0baea4a6
--- /dev/null
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -0,0 +1,338 @@
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <drm/drmP.h>
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "nid.h"
28
29u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
30
31/*
32 * DMA
33 * Starting with R600, the GPU has an asynchronous
34 * DMA engine. The programming model is very similar
35 * to the 3D engine (ring buffer, IBs, etc.), but the
36 * DMA controller has it's own packet format that is
37 * different form the PM4 format used by the 3D engine.
38 * It supports copying data, writing embedded data,
39 * solid fills, and a number of other things. It also
40 * has support for tiling/detiling of buffers.
41 * Cayman and newer support two asynchronous DMA engines.
42 */
43
44/**
45 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
46 *
47 * @rdev: radeon_device pointer
48 * @ib: IB object to schedule
49 *
50 * Schedule an IB in the DMA ring (cayman-SI).
51 */
52void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
53 struct radeon_ib *ib)
54{
55 struct radeon_ring *ring = &rdev->ring[ib->ring];
56
57 if (rdev->wb.enabled) {
58 u32 next_rptr = ring->wptr + 4;
59 while ((next_rptr & 7) != 5)
60 next_rptr++;
61 next_rptr += 3;
62 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
63 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
64 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
65 radeon_ring_write(ring, next_rptr);
66 }
67
68 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
69 * Pad as necessary with NOPs.
70 */
71 while ((ring->wptr & 7) != 5)
72 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
73 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
74 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
75 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
76
77}
78
79/**
80 * cayman_dma_stop - stop the async dma engines
81 *
82 * @rdev: radeon_device pointer
83 *
84 * Stop the async dma engines (cayman-SI).
85 */
86void cayman_dma_stop(struct radeon_device *rdev)
87{
88 u32 rb_cntl;
89
90 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
91
92 /* dma0 */
93 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
94 rb_cntl &= ~DMA_RB_ENABLE;
95 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
96
97 /* dma1 */
98 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
99 rb_cntl &= ~DMA_RB_ENABLE;
100 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
101
102 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
103 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
104}
105
106/**
107 * cayman_dma_resume - setup and start the async dma engines
108 *
109 * @rdev: radeon_device pointer
110 *
111 * Set up the DMA ring buffers and enable them. (cayman-SI).
112 * Returns 0 for success, error for failure.
113 */
114int cayman_dma_resume(struct radeon_device *rdev)
115{
116 struct radeon_ring *ring;
117 u32 rb_cntl, dma_cntl, ib_cntl;
118 u32 rb_bufsz;
119 u32 reg_offset, wb_offset;
120 int i, r;
121
122 /* Reset dma */
123 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
124 RREG32(SRBM_SOFT_RESET);
125 udelay(50);
126 WREG32(SRBM_SOFT_RESET, 0);
127
128 for (i = 0; i < 2; i++) {
129 if (i == 0) {
130 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
131 reg_offset = DMA0_REGISTER_OFFSET;
132 wb_offset = R600_WB_DMA_RPTR_OFFSET;
133 } else {
134 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
135 reg_offset = DMA1_REGISTER_OFFSET;
136 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
137 }
138
139 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
140 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
141
142 /* Set ring buffer size in dwords */
143 rb_bufsz = drm_order(ring->ring_size / 4);
144 rb_cntl = rb_bufsz << 1;
145#ifdef __BIG_ENDIAN
146 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
147#endif
148 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
149
150 /* Initialize the ring buffer's read and write pointers */
151 WREG32(DMA_RB_RPTR + reg_offset, 0);
152 WREG32(DMA_RB_WPTR + reg_offset, 0);
153
154 /* set the wb address whether it's enabled or not */
155 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
156 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
157 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
158 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
159
160 if (rdev->wb.enabled)
161 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
162
163 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
164
165 /* enable DMA IBs */
166 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
167#ifdef __BIG_ENDIAN
168 ib_cntl |= DMA_IB_SWAP_ENABLE;
169#endif
170 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
171
172 dma_cntl = RREG32(DMA_CNTL + reg_offset);
173 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
174 WREG32(DMA_CNTL + reg_offset, dma_cntl);
175
176 ring->wptr = 0;
177 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
178
179 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
180
181 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
182
183 ring->ready = true;
184
185 r = radeon_ring_test(rdev, ring->idx, ring);
186 if (r) {
187 ring->ready = false;
188 return r;
189 }
190 }
191
192 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
193
194 return 0;
195}
196
197/**
198 * cayman_dma_fini - tear down the async dma engines
199 *
200 * @rdev: radeon_device pointer
201 *
202 * Stop the async dma engines and free the rings (cayman-SI).
203 */
204void cayman_dma_fini(struct radeon_device *rdev)
205{
206 cayman_dma_stop(rdev);
207 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
208 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
209}
210
211/**
212 * cayman_dma_is_lockup - Check if the DMA engine is locked up
213 *
214 * @rdev: radeon_device pointer
215 * @ring: radeon_ring structure holding ring information
216 *
217 * Check if the async DMA engine is locked up.
218 * Returns true if the engine appears to be locked up, false if not.
219 */
220bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
221{
222 u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
223 u32 mask;
224
225 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
226 mask = RADEON_RESET_DMA;
227 else
228 mask = RADEON_RESET_DMA1;
229
230 if (!(reset_mask & mask)) {
231 radeon_ring_lockup_update(ring);
232 return false;
233 }
234 /* force ring activities */
235 radeon_ring_force_activity(rdev, ring);
236 return radeon_ring_test_lockup(rdev, ring);
237}
238
239/**
240 * cayman_dma_vm_set_page - update the page tables using the DMA
241 *
242 * @rdev: radeon_device pointer
243 * @ib: indirect buffer to fill with commands
244 * @pe: addr of the page entry
245 * @addr: dst addr to write into pe
246 * @count: number of page entries to update
247 * @incr: increase next addr by incr bytes
248 * @flags: access flags
249 * @r600_flags: hw access flags
250 *
251 * Update the page tables using the DMA (cayman/TN).
252 */
253void cayman_dma_vm_set_page(struct radeon_device *rdev,
254 struct radeon_ib *ib,
255 uint64_t pe,
256 uint64_t addr, unsigned count,
257 uint32_t incr, uint32_t flags)
258{
259 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
260 uint64_t value;
261 unsigned ndw;
262
263 if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) {
264 while (count) {
265 ndw = count * 2;
266 if (ndw > 0xFFFFE)
267 ndw = 0xFFFFE;
268
269 /* for non-physically contiguous pages (system) */
270 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
271 ib->ptr[ib->length_dw++] = pe;
272 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
273 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
274 if (flags & RADEON_VM_PAGE_SYSTEM) {
275 value = radeon_vm_map_gart(rdev, addr);
276 value &= 0xFFFFFFFFFFFFF000ULL;
277 } else if (flags & RADEON_VM_PAGE_VALID) {
278 value = addr;
279 } else {
280 value = 0;
281 }
282 addr += incr;
283 value |= r600_flags;
284 ib->ptr[ib->length_dw++] = value;
285 ib->ptr[ib->length_dw++] = upper_32_bits(value);
286 }
287 }
288 } else {
289 while (count) {
290 ndw = count * 2;
291 if (ndw > 0xFFFFE)
292 ndw = 0xFFFFE;
293
294 if (flags & RADEON_VM_PAGE_VALID)
295 value = addr;
296 else
297 value = 0;
298 /* for physically contiguous pages (vram) */
299 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
300 ib->ptr[ib->length_dw++] = pe; /* dst addr */
301 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
302 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
303 ib->ptr[ib->length_dw++] = 0;
304 ib->ptr[ib->length_dw++] = value; /* value */
305 ib->ptr[ib->length_dw++] = upper_32_bits(value);
306 ib->ptr[ib->length_dw++] = incr; /* increment size */
307 ib->ptr[ib->length_dw++] = 0;
308 pe += ndw * 4;
309 addr += (ndw / 2) * incr;
310 count -= ndw / 2;
311 }
312 }
313 while (ib->length_dw & 0x7)
314 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
315}
316
317void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
318{
319 struct radeon_ring *ring = &rdev->ring[ridx];
320
321 if (vm == NULL)
322 return;
323
324 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
325 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
326 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
327
328 /* flush hdp cache */
329 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
330 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
331 radeon_ring_write(ring, 1);
332
333 /* bits 0-7 are the VM contexts0-7 */
334 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
335 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
336 radeon_ring_write(ring, 1 << vm->id);
337}
338
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 3a08ef92d33f..087cff444ba2 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1374,7 +1374,7 @@ static bool r600_is_display_hung(struct radeon_device *rdev)
1374 return true; 1374 return true;
1375} 1375}
1376 1376
1377static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) 1377u32 r600_gpu_check_soft_reset(struct radeon_device *rdev)
1378{ 1378{
1379 u32 reset_mask = 0; 1379 u32 reset_mask = 0;
1380 u32 tmp; 1380 u32 tmp;
@@ -1622,28 +1622,6 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1622 return radeon_ring_test_lockup(rdev, ring); 1622 return radeon_ring_test_lockup(rdev, ring);
1623} 1623}
1624 1624
1625/**
1626 * r600_dma_is_lockup - Check if the DMA engine is locked up
1627 *
1628 * @rdev: radeon_device pointer
1629 * @ring: radeon_ring structure holding ring information
1630 *
1631 * Check if the async DMA engine is locked up.
1632 * Returns true if the engine appears to be locked up, false if not.
1633 */
1634bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1635{
1636 u32 reset_mask = r600_gpu_check_soft_reset(rdev);
1637
1638 if (!(reset_mask & RADEON_RESET_DMA)) {
1639 radeon_ring_lockup_update(ring);
1640 return false;
1641 }
1642 /* force ring activities */
1643 radeon_ring_force_activity(rdev, ring);
1644 return radeon_ring_test_lockup(rdev, ring);
1645}
1646
1647u32 r6xx_remap_render_backend(struct radeon_device *rdev, 1625u32 r6xx_remap_render_backend(struct radeon_device *rdev,
1648 u32 tiling_pipe_num, 1626 u32 tiling_pipe_num,
1649 u32 max_rb_num, 1627 u32 max_rb_num,
@@ -2494,176 +2472,6 @@ void r600_cp_fini(struct radeon_device *rdev)
2494} 2472}
2495 2473
2496/* 2474/*
2497 * DMA
2498 * Starting with R600, the GPU has an asynchronous
2499 * DMA engine. The programming model is very similar
2500 * to the 3D engine (ring buffer, IBs, etc.), but the
2501 * DMA controller has it's own packet format that is
2502 * different form the PM4 format used by the 3D engine.
2503 * It supports copying data, writing embedded data,
2504 * solid fills, and a number of other things. It also
2505 * has support for tiling/detiling of buffers.
2506 */
2507
2508/**
2509 * r600_dma_get_rptr - get the current read pointer
2510 *
2511 * @rdev: radeon_device pointer
2512 * @ring: radeon ring pointer
2513 *
2514 * Get the current rptr from the hardware (r6xx+).
2515 */
2516uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
2517 struct radeon_ring *ring)
2518{
2519 return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2;
2520}
2521
2522/**
2523 * r600_dma_get_wptr - get the current write pointer
2524 *
2525 * @rdev: radeon_device pointer
2526 * @ring: radeon ring pointer
2527 *
2528 * Get the current wptr from the hardware (r6xx+).
2529 */
2530uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
2531 struct radeon_ring *ring)
2532{
2533 return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2;
2534}
2535
2536/**
2537 * r600_dma_set_wptr - commit the write pointer
2538 *
2539 * @rdev: radeon_device pointer
2540 * @ring: radeon ring pointer
2541 *
2542 * Write the wptr back to the hardware (r6xx+).
2543 */
2544void r600_dma_set_wptr(struct radeon_device *rdev,
2545 struct radeon_ring *ring)
2546{
2547 WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc);
2548}
2549
2550/**
2551 * r600_dma_stop - stop the async dma engine
2552 *
2553 * @rdev: radeon_device pointer
2554 *
2555 * Stop the async dma engine (r6xx-evergreen).
2556 */
2557void r600_dma_stop(struct radeon_device *rdev)
2558{
2559 u32 rb_cntl = RREG32(DMA_RB_CNTL);
2560
2561 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2562
2563 rb_cntl &= ~DMA_RB_ENABLE;
2564 WREG32(DMA_RB_CNTL, rb_cntl);
2565
2566 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
2567}
2568
2569/**
2570 * r600_dma_resume - setup and start the async dma engine
2571 *
2572 * @rdev: radeon_device pointer
2573 *
2574 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
2575 * Returns 0 for success, error for failure.
2576 */
2577int r600_dma_resume(struct radeon_device *rdev)
2578{
2579 struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2580 u32 rb_cntl, dma_cntl, ib_cntl;
2581 u32 rb_bufsz;
2582 int r;
2583
2584 /* Reset dma */
2585 if (rdev->family >= CHIP_RV770)
2586 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
2587 else
2588 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
2589 RREG32(SRBM_SOFT_RESET);
2590 udelay(50);
2591 WREG32(SRBM_SOFT_RESET, 0);
2592
2593 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
2594 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
2595
2596 /* Set ring buffer size in dwords */
2597 rb_bufsz = drm_order(ring->ring_size / 4);
2598 rb_cntl = rb_bufsz << 1;
2599#ifdef __BIG_ENDIAN
2600 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
2601#endif
2602 WREG32(DMA_RB_CNTL, rb_cntl);
2603
2604 /* Initialize the ring buffer's read and write pointers */
2605 WREG32(DMA_RB_RPTR, 0);
2606 WREG32(DMA_RB_WPTR, 0);
2607
2608 /* set the wb address whether it's enabled or not */
2609 WREG32(DMA_RB_RPTR_ADDR_HI,
2610 upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
2611 WREG32(DMA_RB_RPTR_ADDR_LO,
2612 ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
2613
2614 if (rdev->wb.enabled)
2615 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
2616
2617 WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
2618
2619 /* enable DMA IBs */
2620 ib_cntl = DMA_IB_ENABLE;
2621#ifdef __BIG_ENDIAN
2622 ib_cntl |= DMA_IB_SWAP_ENABLE;
2623#endif
2624 WREG32(DMA_IB_CNTL, ib_cntl);
2625
2626 dma_cntl = RREG32(DMA_CNTL);
2627 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
2628 WREG32(DMA_CNTL, dma_cntl);
2629
2630 if (rdev->family >= CHIP_RV770)
2631 WREG32(DMA_MODE, 1);
2632
2633 ring->wptr = 0;
2634 WREG32(DMA_RB_WPTR, ring->wptr << 2);
2635
2636 ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
2637
2638 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
2639
2640 ring->ready = true;
2641
2642 r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
2643 if (r) {
2644 ring->ready = false;
2645 return r;
2646 }
2647
2648 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2649
2650 return 0;
2651}
2652
2653/**
2654 * r600_dma_fini - tear down the async dma engine
2655 *
2656 * @rdev: radeon_device pointer
2657 *
2658 * Stop the async dma engine and free the ring (r6xx-evergreen).
2659 */
2660void r600_dma_fini(struct radeon_device *rdev)
2661{
2662 r600_dma_stop(rdev);
2663 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2664}
2665
2666/*
2667 * GPU scratch registers helpers function. 2475 * GPU scratch registers helpers function.
2668 */ 2476 */
2669void r600_scratch_init(struct radeon_device *rdev) 2477void r600_scratch_init(struct radeon_device *rdev)
@@ -2718,60 +2526,6 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2718 return r; 2526 return r;
2719} 2527}
2720 2528
2721/**
2722 * r600_dma_ring_test - simple async dma engine test
2723 *
2724 * @rdev: radeon_device pointer
2725 * @ring: radeon_ring structure holding ring information
2726 *
2727 * Test the DMA engine by writing using it to write an
2728 * value to memory. (r6xx-SI).
2729 * Returns 0 for success, error for failure.
2730 */
2731int r600_dma_ring_test(struct radeon_device *rdev,
2732 struct radeon_ring *ring)
2733{
2734 unsigned i;
2735 int r;
2736 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2737 u32 tmp;
2738
2739 if (!ptr) {
2740 DRM_ERROR("invalid vram scratch pointer\n");
2741 return -EINVAL;
2742 }
2743
2744 tmp = 0xCAFEDEAD;
2745 writel(tmp, ptr);
2746
2747 r = radeon_ring_lock(rdev, ring, 4);
2748 if (r) {
2749 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2750 return r;
2751 }
2752 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
2753 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2754 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
2755 radeon_ring_write(ring, 0xDEADBEEF);
2756 radeon_ring_unlock_commit(rdev, ring);
2757
2758 for (i = 0; i < rdev->usec_timeout; i++) {
2759 tmp = readl(ptr);
2760 if (tmp == 0xDEADBEEF)
2761 break;
2762 DRM_UDELAY(1);
2763 }
2764
2765 if (i < rdev->usec_timeout) {
2766 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2767 } else {
2768 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2769 ring->idx, tmp);
2770 r = -EINVAL;
2771 }
2772 return r;
2773}
2774
2775/* 2529/*
2776 * CP fences/semaphores 2530 * CP fences/semaphores
2777 */ 2531 */
@@ -2839,59 +2593,6 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
2839 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); 2593 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
2840} 2594}
2841 2595
2842/*
2843 * DMA fences/semaphores
2844 */
2845
2846/**
2847 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
2848 *
2849 * @rdev: radeon_device pointer
2850 * @fence: radeon fence object
2851 *
2852 * Add a DMA fence packet to the ring to write
2853 * the fence seq number and DMA trap packet to generate
2854 * an interrupt if needed (r6xx-r7xx).
2855 */
2856void r600_dma_fence_ring_emit(struct radeon_device *rdev,
2857 struct radeon_fence *fence)
2858{
2859 struct radeon_ring *ring = &rdev->ring[fence->ring];
2860 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2861
2862 /* write the fence */
2863 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
2864 radeon_ring_write(ring, addr & 0xfffffffc);
2865 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
2866 radeon_ring_write(ring, lower_32_bits(fence->seq));
2867 /* generate an interrupt */
2868 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
2869}
2870
2871/**
2872 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
2873 *
2874 * @rdev: radeon_device pointer
2875 * @ring: radeon_ring structure holding ring information
2876 * @semaphore: radeon semaphore object
2877 * @emit_wait: wait or signal semaphore
2878 *
2879 * Add a DMA semaphore packet to the ring wait on or signal
2880 * other rings (r6xx-SI).
2881 */
2882void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
2883 struct radeon_ring *ring,
2884 struct radeon_semaphore *semaphore,
2885 bool emit_wait)
2886{
2887 u64 addr = semaphore->gpu_addr;
2888 u32 s = emit_wait ? 0 : 1;
2889
2890 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
2891 radeon_ring_write(ring, addr & 0xfffffffc);
2892 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
2893}
2894
2895/** 2596/**
2896 * r600_copy_cpdma - copy pages using the CP DMA engine 2597 * r600_copy_cpdma - copy pages using the CP DMA engine
2897 * 2598 *
@@ -2976,80 +2677,6 @@ int r600_copy_cpdma(struct radeon_device *rdev,
2976 return r; 2677 return r;
2977} 2678}
2978 2679
2979/**
2980 * r600_copy_dma - copy pages using the DMA engine
2981 *
2982 * @rdev: radeon_device pointer
2983 * @src_offset: src GPU address
2984 * @dst_offset: dst GPU address
2985 * @num_gpu_pages: number of GPU pages to xfer
2986 * @fence: radeon fence object
2987 *
2988 * Copy GPU paging using the DMA engine (r6xx).
2989 * Used by the radeon ttm implementation to move pages if
2990 * registered as the asic copy callback.
2991 */
2992int r600_copy_dma(struct radeon_device *rdev,
2993 uint64_t src_offset, uint64_t dst_offset,
2994 unsigned num_gpu_pages,
2995 struct radeon_fence **fence)
2996{
2997 struct radeon_semaphore *sem = NULL;
2998 int ring_index = rdev->asic->copy.dma_ring_index;
2999 struct radeon_ring *ring = &rdev->ring[ring_index];
3000 u32 size_in_dw, cur_size_in_dw;
3001 int i, num_loops;
3002 int r = 0;
3003
3004 r = radeon_semaphore_create(rdev, &sem);
3005 if (r) {
3006 DRM_ERROR("radeon: moving bo (%d).\n", r);
3007 return r;
3008 }
3009
3010 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
3011 num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
3012 r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
3013 if (r) {
3014 DRM_ERROR("radeon: moving bo (%d).\n", r);
3015 radeon_semaphore_free(rdev, &sem, NULL);
3016 return r;
3017 }
3018
3019 if (radeon_fence_need_sync(*fence, ring->idx)) {
3020 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3021 ring->idx);
3022 radeon_fence_note_sync(*fence, ring->idx);
3023 } else {
3024 radeon_semaphore_free(rdev, &sem, NULL);
3025 }
3026
3027 for (i = 0; i < num_loops; i++) {
3028 cur_size_in_dw = size_in_dw;
3029 if (cur_size_in_dw > 0xFFFE)
3030 cur_size_in_dw = 0xFFFE;
3031 size_in_dw -= cur_size_in_dw;
3032 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
3033 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3034 radeon_ring_write(ring, src_offset & 0xfffffffc);
3035 radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
3036 (upper_32_bits(src_offset) & 0xff)));
3037 src_offset += cur_size_in_dw * 4;
3038 dst_offset += cur_size_in_dw * 4;
3039 }
3040
3041 r = radeon_fence_emit(rdev, fence, ring->idx);
3042 if (r) {
3043 radeon_ring_unlock_undo(rdev, ring);
3044 return r;
3045 }
3046
3047 radeon_ring_unlock_commit(rdev, ring);
3048 radeon_semaphore_free(rdev, &sem, *fence);
3049
3050 return r;
3051}
3052
3053int r600_set_surface_reg(struct radeon_device *rdev, int reg, 2680int r600_set_surface_reg(struct radeon_device *rdev, int reg,
3054 uint32_t tiling_flags, uint32_t pitch, 2681 uint32_t tiling_flags, uint32_t pitch,
3055 uint32_t offset, uint32_t obj_size) 2682 uint32_t offset, uint32_t obj_size)
@@ -3409,104 +3036,6 @@ free_scratch:
3409 return r; 3036 return r;
3410} 3037}
3411 3038
3412/**
3413 * r600_dma_ib_test - test an IB on the DMA engine
3414 *
3415 * @rdev: radeon_device pointer
3416 * @ring: radeon_ring structure holding ring information
3417 *
3418 * Test a simple IB in the DMA ring (r6xx-SI).
3419 * Returns 0 on success, error on failure.
3420 */
3421int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3422{
3423 struct radeon_ib ib;
3424 unsigned i;
3425 int r;
3426 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3427 u32 tmp = 0;
3428
3429 if (!ptr) {
3430 DRM_ERROR("invalid vram scratch pointer\n");
3431 return -EINVAL;
3432 }
3433
3434 tmp = 0xCAFEDEAD;
3435 writel(tmp, ptr);
3436
3437 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3438 if (r) {
3439 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3440 return r;
3441 }
3442
3443 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
3444 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3445 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
3446 ib.ptr[3] = 0xDEADBEEF;
3447 ib.length_dw = 4;
3448
3449 r = radeon_ib_schedule(rdev, &ib, NULL);
3450 if (r) {
3451 radeon_ib_free(rdev, &ib);
3452 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3453 return r;
3454 }
3455 r = radeon_fence_wait(ib.fence, false);
3456 if (r) {
3457 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3458 return r;
3459 }
3460 for (i = 0; i < rdev->usec_timeout; i++) {
3461 tmp = readl(ptr);
3462 if (tmp == 0xDEADBEEF)
3463 break;
3464 DRM_UDELAY(1);
3465 }
3466 if (i < rdev->usec_timeout) {
3467 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3468 } else {
3469 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3470 r = -EINVAL;
3471 }
3472 radeon_ib_free(rdev, &ib);
3473 return r;
3474}
3475
3476/**
3477 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
3478 *
3479 * @rdev: radeon_device pointer
3480 * @ib: IB object to schedule
3481 *
3482 * Schedule an IB in the DMA ring (r6xx-r7xx).
3483 */
3484void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3485{
3486 struct radeon_ring *ring = &rdev->ring[ib->ring];
3487
3488 if (rdev->wb.enabled) {
3489 u32 next_rptr = ring->wptr + 4;
3490 while ((next_rptr & 7) != 5)
3491 next_rptr++;
3492 next_rptr += 3;
3493 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
3494 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3495 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3496 radeon_ring_write(ring, next_rptr);
3497 }
3498
3499 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3500 * Pad as necessary with NOPs.
3501 */
3502 while ((ring->wptr & 7) != 5)
3503 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3504 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
3505 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3506 radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3507
3508}
3509
3510/* 3039/*
3511 * Interrupts 3040 * Interrupts
3512 * 3041 *
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
new file mode 100644
index 000000000000..bff05576266b
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -0,0 +1,497 @@
1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <drm/drmP.h>
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "r600d.h"
28
29u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
30
31/*
32 * DMA
33 * Starting with R600, the GPU has an asynchronous
34 * DMA engine. The programming model is very similar
35 * to the 3D engine (ring buffer, IBs, etc.), but the
36 * DMA controller has it's own packet format that is
37 * different form the PM4 format used by the 3D engine.
38 * It supports copying data, writing embedded data,
39 * solid fills, and a number of other things. It also
40 * has support for tiling/detiling of buffers.
41 */
42
43/**
44 * r600_dma_get_rptr - get the current read pointer
45 *
46 * @rdev: radeon_device pointer
47 * @ring: radeon ring pointer
48 *
49 * Get the current rptr from the hardware (r6xx+).
50 */
51uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
52 struct radeon_ring *ring)
53{
54 return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2;
55}
56
57/**
58 * r600_dma_get_wptr - get the current write pointer
59 *
60 * @rdev: radeon_device pointer
61 * @ring: radeon ring pointer
62 *
63 * Get the current wptr from the hardware (r6xx+).
64 */
65uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
66 struct radeon_ring *ring)
67{
68 return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2;
69}
70
71/**
72 * r600_dma_set_wptr - commit the write pointer
73 *
74 * @rdev: radeon_device pointer
75 * @ring: radeon ring pointer
76 *
77 * Write the wptr back to the hardware (r6xx+).
78 */
79void r600_dma_set_wptr(struct radeon_device *rdev,
80 struct radeon_ring *ring)
81{
82 WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc);
83}
84
85/**
86 * r600_dma_stop - stop the async dma engine
87 *
88 * @rdev: radeon_device pointer
89 *
90 * Stop the async dma engine (r6xx-evergreen).
91 */
92void r600_dma_stop(struct radeon_device *rdev)
93{
94 u32 rb_cntl = RREG32(DMA_RB_CNTL);
95
96 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
97
98 rb_cntl &= ~DMA_RB_ENABLE;
99 WREG32(DMA_RB_CNTL, rb_cntl);
100
101 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
102}
103
104/**
105 * r600_dma_resume - setup and start the async dma engine
106 *
107 * @rdev: radeon_device pointer
108 *
109 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
110 * Returns 0 for success, error for failure.
111 */
112int r600_dma_resume(struct radeon_device *rdev)
113{
114 struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
115 u32 rb_cntl, dma_cntl, ib_cntl;
116 u32 rb_bufsz;
117 int r;
118
119 /* Reset dma */
120 if (rdev->family >= CHIP_RV770)
121 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
122 else
123 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
124 RREG32(SRBM_SOFT_RESET);
125 udelay(50);
126 WREG32(SRBM_SOFT_RESET, 0);
127
128 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
129 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
130
131 /* Set ring buffer size in dwords */
132 rb_bufsz = drm_order(ring->ring_size / 4);
133 rb_cntl = rb_bufsz << 1;
134#ifdef __BIG_ENDIAN
135 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
136#endif
137 WREG32(DMA_RB_CNTL, rb_cntl);
138
139 /* Initialize the ring buffer's read and write pointers */
140 WREG32(DMA_RB_RPTR, 0);
141 WREG32(DMA_RB_WPTR, 0);
142
143 /* set the wb address whether it's enabled or not */
144 WREG32(DMA_RB_RPTR_ADDR_HI,
145 upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
146 WREG32(DMA_RB_RPTR_ADDR_LO,
147 ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
148
149 if (rdev->wb.enabled)
150 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
151
152 WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
153
154 /* enable DMA IBs */
155 ib_cntl = DMA_IB_ENABLE;
156#ifdef __BIG_ENDIAN
157 ib_cntl |= DMA_IB_SWAP_ENABLE;
158#endif
159 WREG32(DMA_IB_CNTL, ib_cntl);
160
161 dma_cntl = RREG32(DMA_CNTL);
162 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
163 WREG32(DMA_CNTL, dma_cntl);
164
165 if (rdev->family >= CHIP_RV770)
166 WREG32(DMA_MODE, 1);
167
168 ring->wptr = 0;
169 WREG32(DMA_RB_WPTR, ring->wptr << 2);
170
171 ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
172
173 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
174
175 ring->ready = true;
176
177 r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
178 if (r) {
179 ring->ready = false;
180 return r;
181 }
182
183 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
184
185 return 0;
186}
187
188/**
189 * r600_dma_fini - tear down the async dma engine
190 *
191 * @rdev: radeon_device pointer
192 *
193 * Stop the async dma engine and free the ring (r6xx-evergreen).
194 */
195void r600_dma_fini(struct radeon_device *rdev)
196{
197 r600_dma_stop(rdev);
198 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
199}
200
201/**
202 * r600_dma_is_lockup - Check if the DMA engine is locked up
203 *
204 * @rdev: radeon_device pointer
205 * @ring: radeon_ring structure holding ring information
206 *
207 * Check if the async DMA engine is locked up.
208 * Returns true if the engine appears to be locked up, false if not.
209 */
210bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
211{
212 u32 reset_mask = r600_gpu_check_soft_reset(rdev);
213
214 if (!(reset_mask & RADEON_RESET_DMA)) {
215 radeon_ring_lockup_update(ring);
216 return false;
217 }
218 /* force ring activities */
219 radeon_ring_force_activity(rdev, ring);
220 return radeon_ring_test_lockup(rdev, ring);
221}
222
223
224/**
225 * r600_dma_ring_test - simple async dma engine test
226 *
227 * @rdev: radeon_device pointer
228 * @ring: radeon_ring structure holding ring information
229 *
230 * Test the DMA engine by writing using it to write an
231 * value to memory. (r6xx-SI).
232 * Returns 0 for success, error for failure.
233 */
234int r600_dma_ring_test(struct radeon_device *rdev,
235 struct radeon_ring *ring)
236{
237 unsigned i;
238 int r;
239 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
240 u32 tmp;
241
242 if (!ptr) {
243 DRM_ERROR("invalid vram scratch pointer\n");
244 return -EINVAL;
245 }
246
247 tmp = 0xCAFEDEAD;
248 writel(tmp, ptr);
249
250 r = radeon_ring_lock(rdev, ring, 4);
251 if (r) {
252 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
253 return r;
254 }
255 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
256 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
257 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
258 radeon_ring_write(ring, 0xDEADBEEF);
259 radeon_ring_unlock_commit(rdev, ring);
260
261 for (i = 0; i < rdev->usec_timeout; i++) {
262 tmp = readl(ptr);
263 if (tmp == 0xDEADBEEF)
264 break;
265 DRM_UDELAY(1);
266 }
267
268 if (i < rdev->usec_timeout) {
269 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
270 } else {
271 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
272 ring->idx, tmp);
273 r = -EINVAL;
274 }
275 return r;
276}
277
278/**
279 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
280 *
281 * @rdev: radeon_device pointer
282 * @fence: radeon fence object
283 *
284 * Add a DMA fence packet to the ring to write
285 * the fence seq number and DMA trap packet to generate
286 * an interrupt if needed (r6xx-r7xx).
287 */
288void r600_dma_fence_ring_emit(struct radeon_device *rdev,
289 struct radeon_fence *fence)
290{
291 struct radeon_ring *ring = &rdev->ring[fence->ring];
292 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
293
294 /* write the fence */
295 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
296 radeon_ring_write(ring, addr & 0xfffffffc);
297 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
298 radeon_ring_write(ring, lower_32_bits(fence->seq));
299 /* generate an interrupt */
300 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
301}
302
303/**
304 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
305 *
306 * @rdev: radeon_device pointer
307 * @ring: radeon_ring structure holding ring information
308 * @semaphore: radeon semaphore object
309 * @emit_wait: wait or signal semaphore
310 *
311 * Add a DMA semaphore packet to the ring wait on or signal
312 * other rings (r6xx-SI).
313 */
314void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
315 struct radeon_ring *ring,
316 struct radeon_semaphore *semaphore,
317 bool emit_wait)
318{
319 u64 addr = semaphore->gpu_addr;
320 u32 s = emit_wait ? 0 : 1;
321
322 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
323 radeon_ring_write(ring, addr & 0xfffffffc);
324 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
325}
326
327/**
328 * r600_dma_ib_test - test an IB on the DMA engine
329 *
330 * @rdev: radeon_device pointer
331 * @ring: radeon_ring structure holding ring information
332 *
333 * Test a simple IB in the DMA ring (r6xx-SI).
334 * Returns 0 on success, error on failure.
335 */
336int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
337{
338 struct radeon_ib ib;
339 unsigned i;
340 int r;
341 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
342 u32 tmp = 0;
343
344 if (!ptr) {
345 DRM_ERROR("invalid vram scratch pointer\n");
346 return -EINVAL;
347 }
348
349 tmp = 0xCAFEDEAD;
350 writel(tmp, ptr);
351
352 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
353 if (r) {
354 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
355 return r;
356 }
357
358 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
359 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
360 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
361 ib.ptr[3] = 0xDEADBEEF;
362 ib.length_dw = 4;
363
364 r = radeon_ib_schedule(rdev, &ib, NULL);
365 if (r) {
366 radeon_ib_free(rdev, &ib);
367 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
368 return r;
369 }
370 r = radeon_fence_wait(ib.fence, false);
371 if (r) {
372 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
373 return r;
374 }
375 for (i = 0; i < rdev->usec_timeout; i++) {
376 tmp = readl(ptr);
377 if (tmp == 0xDEADBEEF)
378 break;
379 DRM_UDELAY(1);
380 }
381 if (i < rdev->usec_timeout) {
382 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
383 } else {
384 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
385 r = -EINVAL;
386 }
387 radeon_ib_free(rdev, &ib);
388 return r;
389}
390
391/**
392 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
393 *
394 * @rdev: radeon_device pointer
395 * @ib: IB object to schedule
396 *
397 * Schedule an IB in the DMA ring (r6xx-r7xx).
398 */
399void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
400{
401 struct radeon_ring *ring = &rdev->ring[ib->ring];
402
403 if (rdev->wb.enabled) {
404 u32 next_rptr = ring->wptr + 4;
405 while ((next_rptr & 7) != 5)
406 next_rptr++;
407 next_rptr += 3;
408 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
409 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
410 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
411 radeon_ring_write(ring, next_rptr);
412 }
413
414 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
415 * Pad as necessary with NOPs.
416 */
417 while ((ring->wptr & 7) != 5)
418 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
419 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
420 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
421 radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
422
423}
424
425/**
426 * r600_copy_dma - copy pages using the DMA engine
427 *
428 * @rdev: radeon_device pointer
429 * @src_offset: src GPU address
430 * @dst_offset: dst GPU address
431 * @num_gpu_pages: number of GPU pages to xfer
432 * @fence: radeon fence object
433 *
434 * Copy GPU paging using the DMA engine (r6xx).
435 * Used by the radeon ttm implementation to move pages if
436 * registered as the asic copy callback.
437 */
438int r600_copy_dma(struct radeon_device *rdev,
439 uint64_t src_offset, uint64_t dst_offset,
440 unsigned num_gpu_pages,
441 struct radeon_fence **fence)
442{
443 struct radeon_semaphore *sem = NULL;
444 int ring_index = rdev->asic->copy.dma_ring_index;
445 struct radeon_ring *ring = &rdev->ring[ring_index];
446 u32 size_in_dw, cur_size_in_dw;
447 int i, num_loops;
448 int r = 0;
449
450 r = radeon_semaphore_create(rdev, &sem);
451 if (r) {
452 DRM_ERROR("radeon: moving bo (%d).\n", r);
453 return r;
454 }
455
456 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
457 num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
458 r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
459 if (r) {
460 DRM_ERROR("radeon: moving bo (%d).\n", r);
461 radeon_semaphore_free(rdev, &sem, NULL);
462 return r;
463 }
464
465 if (radeon_fence_need_sync(*fence, ring->idx)) {
466 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
467 ring->idx);
468 radeon_fence_note_sync(*fence, ring->idx);
469 } else {
470 radeon_semaphore_free(rdev, &sem, NULL);
471 }
472
473 for (i = 0; i < num_loops; i++) {
474 cur_size_in_dw = size_in_dw;
475 if (cur_size_in_dw > 0xFFFE)
476 cur_size_in_dw = 0xFFFE;
477 size_in_dw -= cur_size_in_dw;
478 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
479 radeon_ring_write(ring, dst_offset & 0xfffffffc);
480 radeon_ring_write(ring, src_offset & 0xfffffffc);
481 radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
482 (upper_32_bits(src_offset) & 0xff)));
483 src_offset += cur_size_in_dw * 4;
484 dst_offset += cur_size_in_dw * 4;
485 }
486
487 r = radeon_fence_emit(rdev, fence, ring->idx);
488 if (r) {
489 radeon_ring_unlock_undo(rdev, ring);
490 return r;
491 }
492
493 radeon_ring_unlock_commit(rdev, ring);
494 radeon_semaphore_free(rdev, &sem, *fence);
495
496 return r;
497}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index aaab7b1bba27..b811296462a3 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1650,80 +1650,6 @@ static int rv770_mc_init(struct radeon_device *rdev)
1650 return 0; 1650 return 0;
1651} 1651}
1652 1652
1653/**
1654 * rv770_copy_dma - copy pages using the DMA engine
1655 *
1656 * @rdev: radeon_device pointer
1657 * @src_offset: src GPU address
1658 * @dst_offset: dst GPU address
1659 * @num_gpu_pages: number of GPU pages to xfer
1660 * @fence: radeon fence object
1661 *
1662 * Copy GPU paging using the DMA engine (r7xx).
1663 * Used by the radeon ttm implementation to move pages if
1664 * registered as the asic copy callback.
1665 */
1666int rv770_copy_dma(struct radeon_device *rdev,
1667 uint64_t src_offset, uint64_t dst_offset,
1668 unsigned num_gpu_pages,
1669 struct radeon_fence **fence)
1670{
1671 struct radeon_semaphore *sem = NULL;
1672 int ring_index = rdev->asic->copy.dma_ring_index;
1673 struct radeon_ring *ring = &rdev->ring[ring_index];
1674 u32 size_in_dw, cur_size_in_dw;
1675 int i, num_loops;
1676 int r = 0;
1677
1678 r = radeon_semaphore_create(rdev, &sem);
1679 if (r) {
1680 DRM_ERROR("radeon: moving bo (%d).\n", r);
1681 return r;
1682 }
1683
1684 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
1685 num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
1686 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
1687 if (r) {
1688 DRM_ERROR("radeon: moving bo (%d).\n", r);
1689 radeon_semaphore_free(rdev, &sem, NULL);
1690 return r;
1691 }
1692
1693 if (radeon_fence_need_sync(*fence, ring->idx)) {
1694 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
1695 ring->idx);
1696 radeon_fence_note_sync(*fence, ring->idx);
1697 } else {
1698 radeon_semaphore_free(rdev, &sem, NULL);
1699 }
1700
1701 for (i = 0; i < num_loops; i++) {
1702 cur_size_in_dw = size_in_dw;
1703 if (cur_size_in_dw > 0xFFFF)
1704 cur_size_in_dw = 0xFFFF;
1705 size_in_dw -= cur_size_in_dw;
1706 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
1707 radeon_ring_write(ring, dst_offset & 0xfffffffc);
1708 radeon_ring_write(ring, src_offset & 0xfffffffc);
1709 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
1710 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
1711 src_offset += cur_size_in_dw * 4;
1712 dst_offset += cur_size_in_dw * 4;
1713 }
1714
1715 r = radeon_fence_emit(rdev, fence, ring->idx);
1716 if (r) {
1717 radeon_ring_unlock_undo(rdev, ring);
1718 return r;
1719 }
1720
1721 radeon_ring_unlock_commit(rdev, ring);
1722 radeon_semaphore_free(rdev, &sem, *fence);
1723
1724 return r;
1725}
1726
1727static int rv770_startup(struct radeon_device *rdev) 1653static int rv770_startup(struct radeon_device *rdev)
1728{ 1654{
1729 struct radeon_ring *ring; 1655 struct radeon_ring *ring;
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
new file mode 100644
index 000000000000..f9b02e3d6830
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -0,0 +1,101 @@
1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <drm/drmP.h>
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "rv770d.h"
28
29/**
30 * rv770_copy_dma - copy pages using the DMA engine
31 *
32 * @rdev: radeon_device pointer
33 * @src_offset: src GPU address
34 * @dst_offset: dst GPU address
35 * @num_gpu_pages: number of GPU pages to xfer
36 * @fence: radeon fence object
37 *
38 * Copy GPU paging using the DMA engine (r7xx).
39 * Used by the radeon ttm implementation to move pages if
40 * registered as the asic copy callback.
41 */
42int rv770_copy_dma(struct radeon_device *rdev,
43 uint64_t src_offset, uint64_t dst_offset,
44 unsigned num_gpu_pages,
45 struct radeon_fence **fence)
46{
47 struct radeon_semaphore *sem = NULL;
48 int ring_index = rdev->asic->copy.dma_ring_index;
49 struct radeon_ring *ring = &rdev->ring[ring_index];
50 u32 size_in_dw, cur_size_in_dw;
51 int i, num_loops;
52 int r = 0;
53
54 r = radeon_semaphore_create(rdev, &sem);
55 if (r) {
56 DRM_ERROR("radeon: moving bo (%d).\n", r);
57 return r;
58 }
59
60 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
61 num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
62 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
63 if (r) {
64 DRM_ERROR("radeon: moving bo (%d).\n", r);
65 radeon_semaphore_free(rdev, &sem, NULL);
66 return r;
67 }
68
69 if (radeon_fence_need_sync(*fence, ring->idx)) {
70 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
71 ring->idx);
72 radeon_fence_note_sync(*fence, ring->idx);
73 } else {
74 radeon_semaphore_free(rdev, &sem, NULL);
75 }
76
77 for (i = 0; i < num_loops; i++) {
78 cur_size_in_dw = size_in_dw;
79 if (cur_size_in_dw > 0xFFFF)
80 cur_size_in_dw = 0xFFFF;
81 size_in_dw -= cur_size_in_dw;
82 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
83 radeon_ring_write(ring, dst_offset & 0xfffffffc);
84 radeon_ring_write(ring, src_offset & 0xfffffffc);
85 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
86 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
87 src_offset += cur_size_in_dw * 4;
88 dst_offset += cur_size_in_dw * 4;
89 }
90
91 r = radeon_fence_emit(rdev, fence, ring->idx);
92 if (r) {
93 radeon_ring_unlock_undo(rdev, ring);
94 return r;
95 }
96
97 radeon_ring_unlock_commit(rdev, ring);
98 radeon_semaphore_free(rdev, &sem, *fence);
99
100 return r;
101}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index f3f79089405e..f5307e6bb92b 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -78,6 +78,11 @@ extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_
78extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev); 78extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev); 79extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80extern bool evergreen_is_display_hung(struct radeon_device *rdev); 80extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81extern void si_dma_vm_set_page(struct radeon_device *rdev,
82 struct radeon_ib *ib,
83 uint64_t pe,
84 uint64_t addr, unsigned count,
85 uint32_t incr, uint32_t flags);
81 86
82static const u32 verde_rlc_save_restore_register_list[] = 87static const u32 verde_rlc_save_restore_register_list[] =
83{ 88{
@@ -3495,7 +3500,7 @@ static int si_cp_resume(struct radeon_device *rdev)
3495 return 0; 3500 return 0;
3496} 3501}
3497 3502
3498static u32 si_gpu_check_soft_reset(struct radeon_device *rdev) 3503u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3499{ 3504{
3500 u32 reset_mask = 0; 3505 u32 reset_mask = 0;
3501 u32 tmp; 3506 u32 tmp;
@@ -3744,34 +3749,6 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3744 return radeon_ring_test_lockup(rdev, ring); 3749 return radeon_ring_test_lockup(rdev, ring);
3745} 3750}
3746 3751
3747/**
3748 * si_dma_is_lockup - Check if the DMA engine is locked up
3749 *
3750 * @rdev: radeon_device pointer
3751 * @ring: radeon_ring structure holding ring information
3752 *
3753 * Check if the async DMA engine is locked up.
3754 * Returns true if the engine appears to be locked up, false if not.
3755 */
3756bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3757{
3758 u32 reset_mask = si_gpu_check_soft_reset(rdev);
3759 u32 mask;
3760
3761 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3762 mask = RADEON_RESET_DMA;
3763 else
3764 mask = RADEON_RESET_DMA1;
3765
3766 if (!(reset_mask & mask)) {
3767 radeon_ring_lockup_update(ring);
3768 return false;
3769 }
3770 /* force ring activities */
3771 radeon_ring_force_activity(rdev, ring);
3772 return radeon_ring_test_lockup(rdev, ring);
3773}
3774
3775/* MC */ 3752/* MC */
3776static void si_mc_program(struct radeon_device *rdev) 3753static void si_mc_program(struct radeon_device *rdev)
3777{ 3754{
@@ -4710,58 +4687,7 @@ void si_vm_set_page(struct radeon_device *rdev,
4710 } 4687 }
4711 } else { 4688 } else {
4712 /* DMA */ 4689 /* DMA */
4713 if (flags & RADEON_VM_PAGE_SYSTEM) { 4690 si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4714 while (count) {
4715 ndw = count * 2;
4716 if (ndw > 0xFFFFE)
4717 ndw = 0xFFFFE;
4718
4719 /* for non-physically contiguous pages (system) */
4720 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4721 ib->ptr[ib->length_dw++] = pe;
4722 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4723 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4724 if (flags & RADEON_VM_PAGE_SYSTEM) {
4725 value = radeon_vm_map_gart(rdev, addr);
4726 value &= 0xFFFFFFFFFFFFF000ULL;
4727 } else if (flags & RADEON_VM_PAGE_VALID) {
4728 value = addr;
4729 } else {
4730 value = 0;
4731 }
4732 addr += incr;
4733 value |= r600_flags;
4734 ib->ptr[ib->length_dw++] = value;
4735 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4736 }
4737 }
4738 } else {
4739 while (count) {
4740 ndw = count * 2;
4741 if (ndw > 0xFFFFE)
4742 ndw = 0xFFFFE;
4743
4744 if (flags & RADEON_VM_PAGE_VALID)
4745 value = addr;
4746 else
4747 value = 0;
4748 /* for physically contiguous pages (vram) */
4749 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4750 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4751 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4752 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4753 ib->ptr[ib->length_dw++] = 0;
4754 ib->ptr[ib->length_dw++] = value; /* value */
4755 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4756 ib->ptr[ib->length_dw++] = incr; /* increment size */
4757 ib->ptr[ib->length_dw++] = 0;
4758 pe += ndw * 4;
4759 addr += (ndw / 2) * incr;
4760 count -= ndw / 2;
4761 }
4762 }
4763 while (ib->length_dw & 0x7)
4764 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4765 } 4691 }
4766} 4692}
4767 4693
@@ -4808,32 +4734,6 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4808 radeon_ring_write(ring, 0x0); 4734 radeon_ring_write(ring, 0x0);
4809} 4735}
4810 4736
4811void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4812{
4813 struct radeon_ring *ring = &rdev->ring[ridx];
4814
4815 if (vm == NULL)
4816 return;
4817
4818 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4819 if (vm->id < 8) {
4820 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4821 } else {
4822 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4823 }
4824 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4825
4826 /* flush hdp cache */
4827 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4828 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4829 radeon_ring_write(ring, 1);
4830
4831 /* bits 0-7 are the VM contexts0-7 */
4832 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4833 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4834 radeon_ring_write(ring, 1 << vm->id);
4835}
4836
4837/* 4737/*
4838 * Power and clock gating 4738 * Power and clock gating
4839 */ 4739 */
@@ -6177,80 +6077,6 @@ restart_ih:
6177 return IRQ_HANDLED; 6077 return IRQ_HANDLED;
6178} 6078}
6179 6079
6180/**
6181 * si_copy_dma - copy pages using the DMA engine
6182 *
6183 * @rdev: radeon_device pointer
6184 * @src_offset: src GPU address
6185 * @dst_offset: dst GPU address
6186 * @num_gpu_pages: number of GPU pages to xfer
6187 * @fence: radeon fence object
6188 *
6189 * Copy GPU paging using the DMA engine (SI).
6190 * Used by the radeon ttm implementation to move pages if
6191 * registered as the asic copy callback.
6192 */
6193int si_copy_dma(struct radeon_device *rdev,
6194 uint64_t src_offset, uint64_t dst_offset,
6195 unsigned num_gpu_pages,
6196 struct radeon_fence **fence)
6197{
6198 struct radeon_semaphore *sem = NULL;
6199 int ring_index = rdev->asic->copy.dma_ring_index;
6200 struct radeon_ring *ring = &rdev->ring[ring_index];
6201 u32 size_in_bytes, cur_size_in_bytes;
6202 int i, num_loops;
6203 int r = 0;
6204
6205 r = radeon_semaphore_create(rdev, &sem);
6206 if (r) {
6207 DRM_ERROR("radeon: moving bo (%d).\n", r);
6208 return r;
6209 }
6210
6211 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6212 num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6213 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6214 if (r) {
6215 DRM_ERROR("radeon: moving bo (%d).\n", r);
6216 radeon_semaphore_free(rdev, &sem, NULL);
6217 return r;
6218 }
6219
6220 if (radeon_fence_need_sync(*fence, ring->idx)) {
6221 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6222 ring->idx);
6223 radeon_fence_note_sync(*fence, ring->idx);
6224 } else {
6225 radeon_semaphore_free(rdev, &sem, NULL);
6226 }
6227
6228 for (i = 0; i < num_loops; i++) {
6229 cur_size_in_bytes = size_in_bytes;
6230 if (cur_size_in_bytes > 0xFFFFF)
6231 cur_size_in_bytes = 0xFFFFF;
6232 size_in_bytes -= cur_size_in_bytes;
6233 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6234 radeon_ring_write(ring, dst_offset & 0xffffffff);
6235 radeon_ring_write(ring, src_offset & 0xffffffff);
6236 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6237 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6238 src_offset += cur_size_in_bytes;
6239 dst_offset += cur_size_in_bytes;
6240 }
6241
6242 r = radeon_fence_emit(rdev, fence, ring->idx);
6243 if (r) {
6244 radeon_ring_unlock_undo(rdev, ring);
6245 return r;
6246 }
6247
6248 radeon_ring_unlock_commit(rdev, ring);
6249 radeon_semaphore_free(rdev, &sem, *fence);
6250
6251 return r;
6252}
6253
6254/* 6080/*
6255 * startup/shutdown callbacks 6081 * startup/shutdown callbacks
6256 */ 6082 */
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
new file mode 100644
index 000000000000..49909d23dfce
--- /dev/null
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -0,0 +1,235 @@
1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <drm/drmP.h>
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "sid.h"
28
29u32 si_gpu_check_soft_reset(struct radeon_device *rdev);
30
31/**
32 * si_dma_is_lockup - Check if the DMA engine is locked up
33 *
34 * @rdev: radeon_device pointer
35 * @ring: radeon_ring structure holding ring information
36 *
37 * Check if the async DMA engine is locked up.
38 * Returns true if the engine appears to be locked up, false if not.
39 */
40bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
41{
42 u32 reset_mask = si_gpu_check_soft_reset(rdev);
43 u32 mask;
44
45 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
46 mask = RADEON_RESET_DMA;
47 else
48 mask = RADEON_RESET_DMA1;
49
50 if (!(reset_mask & mask)) {
51 radeon_ring_lockup_update(ring);
52 return false;
53 }
54 /* force ring activities */
55 radeon_ring_force_activity(rdev, ring);
56 return radeon_ring_test_lockup(rdev, ring);
57}
58
59/**
60 * si_dma_vm_set_page - update the page tables using the DMA
61 *
62 * @rdev: radeon_device pointer
63 * @ib: indirect buffer to fill with commands
64 * @pe: addr of the page entry
65 * @addr: dst addr to write into pe
66 * @count: number of page entries to update
67 * @incr: increase next addr by incr bytes
68 * @flags: access flags
69 *
70 * Update the page tables using the DMA (SI).
71 */
72void si_dma_vm_set_page(struct radeon_device *rdev,
73 struct radeon_ib *ib,
74 uint64_t pe,
75 uint64_t addr, unsigned count,
76 uint32_t incr, uint32_t flags)
77{
78 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
79 uint64_t value;
80 unsigned ndw;
81
82 if (flags & RADEON_VM_PAGE_SYSTEM) {
83 while (count) {
84 ndw = count * 2;
85 if (ndw > 0xFFFFE)
86 ndw = 0xFFFFE;
87
88 /* for non-physically contiguous pages (system) */
89 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
90 ib->ptr[ib->length_dw++] = pe;
91 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
92 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
93 if (flags & RADEON_VM_PAGE_SYSTEM) {
94 value = radeon_vm_map_gart(rdev, addr);
95 value &= 0xFFFFFFFFFFFFF000ULL;
96 } else if (flags & RADEON_VM_PAGE_VALID) {
97 value = addr;
98 } else {
99 value = 0;
100 }
101 addr += incr;
102 value |= r600_flags;
103 ib->ptr[ib->length_dw++] = value;
104 ib->ptr[ib->length_dw++] = upper_32_bits(value);
105 }
106 }
107 } else {
108 while (count) {
109 ndw = count * 2;
110 if (ndw > 0xFFFFE)
111 ndw = 0xFFFFE;
112
113 if (flags & RADEON_VM_PAGE_VALID)
114 value = addr;
115 else
116 value = 0;
117 /* for physically contiguous pages (vram) */
118 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
119 ib->ptr[ib->length_dw++] = pe; /* dst addr */
120 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
121 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
122 ib->ptr[ib->length_dw++] = 0;
123 ib->ptr[ib->length_dw++] = value; /* value */
124 ib->ptr[ib->length_dw++] = upper_32_bits(value);
125 ib->ptr[ib->length_dw++] = incr; /* increment size */
126 ib->ptr[ib->length_dw++] = 0;
127 pe += ndw * 4;
128 addr += (ndw / 2) * incr;
129 count -= ndw / 2;
130 }
131 }
132 while (ib->length_dw & 0x7)
133 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
134}
135
136void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
137{
138 struct radeon_ring *ring = &rdev->ring[ridx];
139
140 if (vm == NULL)
141 return;
142
143 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
144 if (vm->id < 8) {
145 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
146 } else {
147 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
148 }
149 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
150
151 /* flush hdp cache */
152 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
153 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
154 radeon_ring_write(ring, 1);
155
156 /* bits 0-7 are the VM contexts0-7 */
157 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
158 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
159 radeon_ring_write(ring, 1 << vm->id);
160}
161
162/**
163 * si_copy_dma - copy pages using the DMA engine
164 *
165 * @rdev: radeon_device pointer
166 * @src_offset: src GPU address
167 * @dst_offset: dst GPU address
168 * @num_gpu_pages: number of GPU pages to xfer
169 * @fence: radeon fence object
170 *
171 * Copy GPU paging using the DMA engine (SI).
172 * Used by the radeon ttm implementation to move pages if
173 * registered as the asic copy callback.
174 */
175int si_copy_dma(struct radeon_device *rdev,
176 uint64_t src_offset, uint64_t dst_offset,
177 unsigned num_gpu_pages,
178 struct radeon_fence **fence)
179{
180 struct radeon_semaphore *sem = NULL;
181 int ring_index = rdev->asic->copy.dma_ring_index;
182 struct radeon_ring *ring = &rdev->ring[ring_index];
183 u32 size_in_bytes, cur_size_in_bytes;
184 int i, num_loops;
185 int r = 0;
186
187 r = radeon_semaphore_create(rdev, &sem);
188 if (r) {
189 DRM_ERROR("radeon: moving bo (%d).\n", r);
190 return r;
191 }
192
193 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
194 num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
195 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
196 if (r) {
197 DRM_ERROR("radeon: moving bo (%d).\n", r);
198 radeon_semaphore_free(rdev, &sem, NULL);
199 return r;
200 }
201
202 if (radeon_fence_need_sync(*fence, ring->idx)) {
203 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
204 ring->idx);
205 radeon_fence_note_sync(*fence, ring->idx);
206 } else {
207 radeon_semaphore_free(rdev, &sem, NULL);
208 }
209
210 for (i = 0; i < num_loops; i++) {
211 cur_size_in_bytes = size_in_bytes;
212 if (cur_size_in_bytes > 0xFFFFF)
213 cur_size_in_bytes = 0xFFFFF;
214 size_in_bytes -= cur_size_in_bytes;
215 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
216 radeon_ring_write(ring, dst_offset & 0xffffffff);
217 radeon_ring_write(ring, src_offset & 0xffffffff);
218 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
219 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
220 src_offset += cur_size_in_bytes;
221 dst_offset += cur_size_in_bytes;
222 }
223
224 r = radeon_fence_emit(rdev, fence, ring->idx);
225 if (r) {
226 radeon_ring_unlock_undo(rdev, ring);
227 return r;
228 }
229
230 radeon_ring_unlock_commit(rdev, ring);
231 radeon_semaphore_free(rdev, &sem, *fence);
232
233 return r;
234}
235