aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/ni.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2012-12-04 15:27:33 -0500
committerAlex Deucher <alexander.deucher@amd.com>2012-12-10 16:53:34 -0500
commitf60cbd117a416830d5a7effc208eab8470a19167 (patch)
tree0b4afcd96291498ff983eac850fc0171121ab645 /drivers/gpu/drm/radeon/ni.c
parent233d1ad59a2895e348259bb6f9f4528a75ea7752 (diff)
drm/radeon/kms: Add initial support for async DMA on cayman/TN
There are 2 async DMA engines on cayman, one at 0xd000 and one at 0xd800. The programming interface is the same as evergreen however there are some changes to the commands for using vmids. Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/ni.c')
-rw-r--r--drivers/gpu/drm/radeon/ni.c272
1 files changed, 271 insertions, 1 deletions
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 30c18a6e0044..b81aca44fd41 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
611 WREG32(GB_ADDR_CONFIG, gb_addr_config); 611 WREG32(GB_ADDR_CONFIG, gb_addr_config);
612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
613 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 613 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
614 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
615 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
614 616
615 tmp = gb_addr_config & NUM_PIPES_MASK; 617 tmp = gb_addr_config & NUM_PIPES_MASK;
616 tmp = r6xx_remap_render_backend(rdev, tmp, 618 tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
915 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 917 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
916 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 918 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
917 WREG32(SCRATCH_UMSK, 0); 919 WREG32(SCRATCH_UMSK, 0);
920 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
918 } 921 }
919} 922}
920 923
@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev)
1128 return 0; 1131 return 0;
1129} 1132}
1130 1133
1134/*
1135 * DMA
1136 * Starting with R600, the GPU has an asynchronous
1137 * DMA engine. The programming model is very similar
1138 * to the 3D engine (ring buffer, IBs, etc.), but the
1139 * DMA controller has it's own packet format that is
1140 * different form the PM4 format used by the 3D engine.
1141 * It supports copying data, writing embedded data,
1142 * solid fills, and a number of other things. It also
1143 * has support for tiling/detiling of buffers.
1144 * Cayman and newer support two asynchronous DMA engines.
1145 */
1146/**
1147 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1148 *
1149 * @rdev: radeon_device pointer
1150 * @ib: IB object to schedule
1151 *
1152 * Schedule an IB in the DMA ring (cayman-SI).
1153 */
1154void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1155 struct radeon_ib *ib)
1156{
1157 struct radeon_ring *ring = &rdev->ring[ib->ring];
1158
1159 if (rdev->wb.enabled) {
1160 u32 next_rptr = ring->wptr + 4;
1161 while ((next_rptr & 7) != 5)
1162 next_rptr++;
1163 next_rptr += 3;
1164 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1165 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1166 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1167 radeon_ring_write(ring, next_rptr);
1168 }
1169
1170 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1171 * Pad as necessary with NOPs.
1172 */
1173 while ((ring->wptr & 7) != 5)
1174 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1175 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1176 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1177 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1178
1179}
1180
1181/**
1182 * cayman_dma_stop - stop the async dma engines
1183 *
1184 * @rdev: radeon_device pointer
1185 *
1186 * Stop the async dma engines (cayman-SI).
1187 */
1188void cayman_dma_stop(struct radeon_device *rdev)
1189{
1190 u32 rb_cntl;
1191
1192 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1193
1194 /* dma0 */
1195 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1196 rb_cntl &= ~DMA_RB_ENABLE;
1197 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1198
1199 /* dma1 */
1200 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1201 rb_cntl &= ~DMA_RB_ENABLE;
1202 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1203
1204 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1205 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1206}
1207
1208/**
1209 * cayman_dma_resume - setup and start the async dma engines
1210 *
1211 * @rdev: radeon_device pointer
1212 *
1213 * Set up the DMA ring buffers and enable them. (cayman-SI).
1214 * Returns 0 for success, error for failure.
1215 */
1216int cayman_dma_resume(struct radeon_device *rdev)
1217{
1218 struct radeon_ring *ring;
1219 u32 rb_cntl, dma_cntl;
1220 u32 rb_bufsz;
1221 u32 reg_offset, wb_offset;
1222 int i, r;
1223
1224 /* Reset dma */
1225 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1226 RREG32(SRBM_SOFT_RESET);
1227 udelay(50);
1228 WREG32(SRBM_SOFT_RESET, 0);
1229
1230 for (i = 0; i < 2; i++) {
1231 if (i == 0) {
1232 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1233 reg_offset = DMA0_REGISTER_OFFSET;
1234 wb_offset = R600_WB_DMA_RPTR_OFFSET;
1235 } else {
1236 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1237 reg_offset = DMA1_REGISTER_OFFSET;
1238 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1239 }
1240
1241 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1242 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1243
1244 /* Set ring buffer size in dwords */
1245 rb_bufsz = drm_order(ring->ring_size / 4);
1246 rb_cntl = rb_bufsz << 1;
1247#ifdef __BIG_ENDIAN
1248 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1249#endif
1250 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1251
1252 /* Initialize the ring buffer's read and write pointers */
1253 WREG32(DMA_RB_RPTR + reg_offset, 0);
1254 WREG32(DMA_RB_WPTR + reg_offset, 0);
1255
1256 /* set the wb address whether it's enabled or not */
1257 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1258 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1259 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1260 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1261
1262 if (rdev->wb.enabled)
1263 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1264
1265 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1266
1267 /* enable DMA IBs */
1268 WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
1269
1270 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1271 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1272 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1273
1274 ring->wptr = 0;
1275 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1276
1277 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1278
1279 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1280
1281 ring->ready = true;
1282
1283 r = radeon_ring_test(rdev, ring->idx, ring);
1284 if (r) {
1285 ring->ready = false;
1286 return r;
1287 }
1288 }
1289
1290 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1291
1292 return 0;
1293}
1294
1295/**
1296 * cayman_dma_fini - tear down the async dma engines
1297 *
1298 * @rdev: radeon_device pointer
1299 *
1300 * Stop the async dma engines and free the rings (cayman-SI).
1301 */
1302void cayman_dma_fini(struct radeon_device *rdev)
1303{
1304 cayman_dma_stop(rdev);
1305 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1306 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1307}
1308
1131static int cayman_gpu_soft_reset(struct radeon_device *rdev) 1309static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1132{ 1310{
1133 struct evergreen_mc_save save; 1311 struct evergreen_mc_save save;
@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev)
1218 return cayman_gpu_soft_reset(rdev); 1396 return cayman_gpu_soft_reset(rdev);
1219} 1397}
1220 1398
1399/**
1400 * cayman_dma_is_lockup - Check if the DMA engine is locked up
1401 *
1402 * @rdev: radeon_device pointer
1403 * @ring: radeon_ring structure holding ring information
1404 *
1405 * Check if the async DMA engine is locked up (cayman-SI).
1406 * Returns true if the engine appears to be locked up, false if not.
1407 */
1408bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1409{
1410 u32 dma_status_reg;
1411
1412 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1413 dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1414 else
1415 dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1416 if (dma_status_reg & DMA_IDLE) {
1417 radeon_ring_lockup_update(ring);
1418 return false;
1419 }
1420 /* force ring activities */
1421 radeon_ring_force_activity(rdev, ring);
1422 return radeon_ring_test_lockup(rdev, ring);
1423}
1424
1221static int cayman_startup(struct radeon_device *rdev) 1425static int cayman_startup(struct radeon_device *rdev)
1222{ 1426{
1223 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1427 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev)
1299 return r; 1503 return r;
1300 } 1504 }
1301 1505
1506 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1507 if (r) {
1508 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1509 return r;
1510 }
1511
1512 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1513 if (r) {
1514 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1515 return r;
1516 }
1517
1302 /* Enable IRQ */ 1518 /* Enable IRQ */
1303 r = r600_irq_init(rdev); 1519 r = r600_irq_init(rdev);
1304 if (r) { 1520 if (r) {
@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev)
1313 0, 0xfffff, RADEON_CP_PACKET2); 1529 0, 0xfffff, RADEON_CP_PACKET2);
1314 if (r) 1530 if (r)
1315 return r; 1531 return r;
1532
1533 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1534 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1535 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1536 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1537 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1538 if (r)
1539 return r;
1540
1541 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1542 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1543 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1544 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1545 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1546 if (r)
1547 return r;
1548
1316 r = cayman_cp_load_microcode(rdev); 1549 r = cayman_cp_load_microcode(rdev);
1317 if (r) 1550 if (r)
1318 return r; 1551 return r;
@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev)
1320 if (r) 1553 if (r)
1321 return r; 1554 return r;
1322 1555
1556 r = cayman_dma_resume(rdev);
1557 if (r)
1558 return r;
1559
1323 r = radeon_ib_pool_init(rdev); 1560 r = radeon_ib_pool_init(rdev);
1324 if (r) { 1561 if (r) {
1325 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1562 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev)
1364{ 1601{
1365 r600_audio_fini(rdev); 1602 r600_audio_fini(rdev);
1366 cayman_cp_enable(rdev, false); 1603 cayman_cp_enable(rdev, false);
1367 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1604 cayman_dma_stop(rdev);
1368 evergreen_irq_suspend(rdev); 1605 evergreen_irq_suspend(rdev);
1369 radeon_wb_disable(rdev); 1606 radeon_wb_disable(rdev);
1370 cayman_pcie_gart_disable(rdev); 1607 cayman_pcie_gart_disable(rdev);
@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev)
1431 ring->ring_obj = NULL; 1668 ring->ring_obj = NULL;
1432 r600_ring_init(rdev, ring, 1024 * 1024); 1669 r600_ring_init(rdev, ring, 1024 * 1024);
1433 1670
1671 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1672 ring->ring_obj = NULL;
1673 r600_ring_init(rdev, ring, 64 * 1024);
1674
1675 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1676 ring->ring_obj = NULL;
1677 r600_ring_init(rdev, ring, 64 * 1024);
1678
1434 rdev->ih.ring_obj = NULL; 1679 rdev->ih.ring_obj = NULL;
1435 r600_ih_ring_init(rdev, 64 * 1024); 1680 r600_ih_ring_init(rdev, 64 * 1024);
1436 1681
@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev)
1443 if (r) { 1688 if (r) {
1444 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1689 dev_err(rdev->dev, "disabling GPU acceleration\n");
1445 cayman_cp_fini(rdev); 1690 cayman_cp_fini(rdev);
1691 cayman_dma_fini(rdev);
1446 r600_irq_fini(rdev); 1692 r600_irq_fini(rdev);
1447 if (rdev->flags & RADEON_IS_IGP) 1693 if (rdev->flags & RADEON_IS_IGP)
1448 si_rlc_fini(rdev); 1694 si_rlc_fini(rdev);
@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev)
1473{ 1719{
1474 r600_blit_fini(rdev); 1720 r600_blit_fini(rdev);
1475 cayman_cp_fini(rdev); 1721 cayman_cp_fini(rdev);
1722 cayman_dma_fini(rdev);
1476 r600_irq_fini(rdev); 1723 r600_irq_fini(rdev);
1477 if (rdev->flags & RADEON_IS_IGP) 1724 if (rdev->flags & RADEON_IS_IGP)
1478 si_rlc_fini(rdev); 1725 si_rlc_fini(rdev);
@@ -1606,3 +1853,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1606 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 1853 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
1607 radeon_ring_write(ring, 0x0); 1854 radeon_ring_write(ring, 0x0);
1608} 1855}
1856
1857void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1858{
1859 struct radeon_ring *ring = &rdev->ring[ridx];
1860
1861 if (vm == NULL)
1862 return;
1863
1864 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1865 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
1866 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
1867
1868 /* flush hdp cache */
1869 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1870 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
1871 radeon_ring_write(ring, 1);
1872
1873 /* bits 0-7 are the VM contexts0-7 */
1874 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1875 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
1876 radeon_ring_write(ring, 1 << vm->id);
1877}
1878