aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2012-12-04 15:27:33 -0500
committerAlex Deucher <alexander.deucher@amd.com>2012-12-10 16:53:34 -0500
commitf60cbd117a416830d5a7effc208eab8470a19167 (patch)
tree0b4afcd96291498ff983eac850fc0171121ab645
parent233d1ad59a2895e348259bb6f9f4528a75ea7752 (diff)
drm/radeon/kms: Add initial support for async DMA on cayman/TN
There are 2 async DMA engines on cayman, one at 0xd000 and one at 0xd800. The programming interface is the same as evergreen however there are some changes to the commands for using vmids. Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c21
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h2
-rw-r--r--drivers/gpu/drm/radeon/ni.c272
-rw-r--r--drivers/gpu/drm/radeon/nid.h75
-rw-r--r--drivers/gpu/drm/radeon/radeon.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c48
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h4
7 files changed, 420 insertions, 7 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 68206df3d5d2..c66251e4a9b9 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
2404 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2404 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2405 cayman_cp_int_cntl_setup(rdev, 1, 0); 2405 cayman_cp_int_cntl_setup(rdev, 1, 0);
2406 cayman_cp_int_cntl_setup(rdev, 2, 0); 2406 cayman_cp_int_cntl_setup(rdev, 2, 0);
2407 tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
2408 WREG32(CAYMAN_DMA1_CNTL, tmp);
2407 } else 2409 } else
2408 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2410 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2409 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; 2411 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
@@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
2460 u32 grbm_int_cntl = 0; 2462 u32 grbm_int_cntl = 0;
2461 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; 2463 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
2462 u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; 2464 u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
2463 u32 dma_cntl; 2465 u32 dma_cntl, dma_cntl1 = 0;
2464 2466
2465 if (!rdev->irq.installed) { 2467 if (!rdev->irq.installed) {
2466 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 2468 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev)
2517 dma_cntl |= TRAP_ENABLE; 2519 dma_cntl |= TRAP_ENABLE;
2518 } 2520 }
2519 2521
2522 if (rdev->family >= CHIP_CAYMAN) {
2523 dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
2524 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
2525 DRM_DEBUG("r600_irq_set: sw int dma1\n");
2526 dma_cntl1 |= TRAP_ENABLE;
2527 }
2528 }
2529
2520 if (rdev->irq.crtc_vblank_int[0] || 2530 if (rdev->irq.crtc_vblank_int[0] ||
2521 atomic_read(&rdev->irq.pflip[0])) { 2531 atomic_read(&rdev->irq.pflip[0])) {
2522 DRM_DEBUG("evergreen_irq_set: vblank 0\n"); 2532 DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev)
2605 2615
2606 WREG32(DMA_CNTL, dma_cntl); 2616 WREG32(DMA_CNTL, dma_cntl);
2607 2617
2618 if (rdev->family >= CHIP_CAYMAN)
2619 WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
2620
2608 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 2621 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
2609 2622
2610 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 2623 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3147,6 +3160,12 @@ restart_ih:
3147 case 233: /* GUI IDLE */ 3160 case 233: /* GUI IDLE */
3148 DRM_DEBUG("IH: GUI idle\n"); 3161 DRM_DEBUG("IH: GUI idle\n");
3149 break; 3162 break;
3163 case 244: /* DMA trap event */
3164 if (rdev->family >= CHIP_CAYMAN) {
3165 DRM_DEBUG("IH: DMA1 trap\n");
3166 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3167 }
3168 break;
3150 default: 3169 default:
3151 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 3170 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3152 break; 3171 break;
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 92d1f4521caf..7b4a650e33b2 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -918,6 +918,8 @@
918# define CTXEMPTY_INT_ENABLE (1 << 28) 918# define CTXEMPTY_INT_ENABLE (1 << 28)
919#define DMA_TILING_CONFIG 0xD0B8 919#define DMA_TILING_CONFIG 0xD0B8
920 920
921#define CAYMAN_DMA1_CNTL 0xd82c
922
921/* async DMA packets */ 923/* async DMA packets */
922#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ 924#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
923 (((t) & 0x1) << 23) | \ 925 (((t) & 0x1) << 23) | \
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 30c18a6e0044..b81aca44fd41 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
611 WREG32(GB_ADDR_CONFIG, gb_addr_config); 611 WREG32(GB_ADDR_CONFIG, gb_addr_config);
612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
613 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 613 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
614 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
615 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
614 616
615 tmp = gb_addr_config & NUM_PIPES_MASK; 617 tmp = gb_addr_config & NUM_PIPES_MASK;
616 tmp = r6xx_remap_render_backend(rdev, tmp, 618 tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
915 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 917 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
916 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 918 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
917 WREG32(SCRATCH_UMSK, 0); 919 WREG32(SCRATCH_UMSK, 0);
920 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
918 } 921 }
919} 922}
920 923
@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev)
1128 return 0; 1131 return 0;
1129} 1132}
1130 1133
1134/*
1135 * DMA
1136 * Starting with R600, the GPU has an asynchronous
1137 * DMA engine. The programming model is very similar
1138 * to the 3D engine (ring buffer, IBs, etc.), but the
1139 * DMA controller has it's own packet format that is
1140 * different form the PM4 format used by the 3D engine.
1141 * It supports copying data, writing embedded data,
1142 * solid fills, and a number of other things. It also
1143 * has support for tiling/detiling of buffers.
1144 * Cayman and newer support two asynchronous DMA engines.
1145 */
1146/**
1147 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1148 *
1149 * @rdev: radeon_device pointer
1150 * @ib: IB object to schedule
1151 *
1152 * Schedule an IB in the DMA ring (cayman-SI).
1153 */
1154void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1155 struct radeon_ib *ib)
1156{
1157 struct radeon_ring *ring = &rdev->ring[ib->ring];
1158
1159 if (rdev->wb.enabled) {
1160 u32 next_rptr = ring->wptr + 4;
1161 while ((next_rptr & 7) != 5)
1162 next_rptr++;
1163 next_rptr += 3;
1164 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1165 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1166 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1167 radeon_ring_write(ring, next_rptr);
1168 }
1169
1170 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1171 * Pad as necessary with NOPs.
1172 */
1173 while ((ring->wptr & 7) != 5)
1174 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1175 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1176 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1177 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1178
1179}
1180
1181/**
1182 * cayman_dma_stop - stop the async dma engines
1183 *
1184 * @rdev: radeon_device pointer
1185 *
1186 * Stop the async dma engines (cayman-SI).
1187 */
1188void cayman_dma_stop(struct radeon_device *rdev)
1189{
1190 u32 rb_cntl;
1191
1192 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1193
1194 /* dma0 */
1195 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1196 rb_cntl &= ~DMA_RB_ENABLE;
1197 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1198
1199 /* dma1 */
1200 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1201 rb_cntl &= ~DMA_RB_ENABLE;
1202 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1203
1204 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1205 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1206}
1207
1208/**
1209 * cayman_dma_resume - setup and start the async dma engines
1210 *
1211 * @rdev: radeon_device pointer
1212 *
1213 * Set up the DMA ring buffers and enable them. (cayman-SI).
1214 * Returns 0 for success, error for failure.
1215 */
1216int cayman_dma_resume(struct radeon_device *rdev)
1217{
1218 struct radeon_ring *ring;
1219 u32 rb_cntl, dma_cntl;
1220 u32 rb_bufsz;
1221 u32 reg_offset, wb_offset;
1222 int i, r;
1223
1224 /* Reset dma */
1225 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1226 RREG32(SRBM_SOFT_RESET);
1227 udelay(50);
1228 WREG32(SRBM_SOFT_RESET, 0);
1229
1230 for (i = 0; i < 2; i++) {
1231 if (i == 0) {
1232 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1233 reg_offset = DMA0_REGISTER_OFFSET;
1234 wb_offset = R600_WB_DMA_RPTR_OFFSET;
1235 } else {
1236 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1237 reg_offset = DMA1_REGISTER_OFFSET;
1238 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1239 }
1240
1241 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1242 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1243
1244 /* Set ring buffer size in dwords */
1245 rb_bufsz = drm_order(ring->ring_size / 4);
1246 rb_cntl = rb_bufsz << 1;
1247#ifdef __BIG_ENDIAN
1248 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1249#endif
1250 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1251
1252 /* Initialize the ring buffer's read and write pointers */
1253 WREG32(DMA_RB_RPTR + reg_offset, 0);
1254 WREG32(DMA_RB_WPTR + reg_offset, 0);
1255
1256 /* set the wb address whether it's enabled or not */
1257 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1258 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1259 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1260 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1261
1262 if (rdev->wb.enabled)
1263 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1264
1265 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1266
1267 /* enable DMA IBs */
1268 WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
1269
1270 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1271 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1272 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1273
1274 ring->wptr = 0;
1275 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1276
1277 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1278
1279 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1280
1281 ring->ready = true;
1282
1283 r = radeon_ring_test(rdev, ring->idx, ring);
1284 if (r) {
1285 ring->ready = false;
1286 return r;
1287 }
1288 }
1289
1290 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1291
1292 return 0;
1293}
1294
1295/**
1296 * cayman_dma_fini - tear down the async dma engines
1297 *
1298 * @rdev: radeon_device pointer
1299 *
1300 * Stop the async dma engines and free the rings (cayman-SI).
1301 */
1302void cayman_dma_fini(struct radeon_device *rdev)
1303{
1304 cayman_dma_stop(rdev);
1305 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1306 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1307}
1308
1131static int cayman_gpu_soft_reset(struct radeon_device *rdev) 1309static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1132{ 1310{
1133 struct evergreen_mc_save save; 1311 struct evergreen_mc_save save;
@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev)
1218 return cayman_gpu_soft_reset(rdev); 1396 return cayman_gpu_soft_reset(rdev);
1219} 1397}
1220 1398
1399/**
1400 * cayman_dma_is_lockup - Check if the DMA engine is locked up
1401 *
1402 * @rdev: radeon_device pointer
1403 * @ring: radeon_ring structure holding ring information
1404 *
1405 * Check if the async DMA engine is locked up (cayman-SI).
1406 * Returns true if the engine appears to be locked up, false if not.
1407 */
1408bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1409{
1410 u32 dma_status_reg;
1411
1412 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1413 dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1414 else
1415 dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1416 if (dma_status_reg & DMA_IDLE) {
1417 radeon_ring_lockup_update(ring);
1418 return false;
1419 }
1420 /* force ring activities */
1421 radeon_ring_force_activity(rdev, ring);
1422 return radeon_ring_test_lockup(rdev, ring);
1423}
1424
1221static int cayman_startup(struct radeon_device *rdev) 1425static int cayman_startup(struct radeon_device *rdev)
1222{ 1426{
1223 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1427 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev)
1299 return r; 1503 return r;
1300 } 1504 }
1301 1505
1506 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1507 if (r) {
1508 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1509 return r;
1510 }
1511
1512 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1513 if (r) {
1514 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1515 return r;
1516 }
1517
1302 /* Enable IRQ */ 1518 /* Enable IRQ */
1303 r = r600_irq_init(rdev); 1519 r = r600_irq_init(rdev);
1304 if (r) { 1520 if (r) {
@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev)
1313 0, 0xfffff, RADEON_CP_PACKET2); 1529 0, 0xfffff, RADEON_CP_PACKET2);
1314 if (r) 1530 if (r)
1315 return r; 1531 return r;
1532
1533 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1534 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1535 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1536 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1537 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1538 if (r)
1539 return r;
1540
1541 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1542 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1543 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1544 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1545 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1546 if (r)
1547 return r;
1548
1316 r = cayman_cp_load_microcode(rdev); 1549 r = cayman_cp_load_microcode(rdev);
1317 if (r) 1550 if (r)
1318 return r; 1551 return r;
@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev)
1320 if (r) 1553 if (r)
1321 return r; 1554 return r;
1322 1555
1556 r = cayman_dma_resume(rdev);
1557 if (r)
1558 return r;
1559
1323 r = radeon_ib_pool_init(rdev); 1560 r = radeon_ib_pool_init(rdev);
1324 if (r) { 1561 if (r) {
1325 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1562 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev)
1364{ 1601{
1365 r600_audio_fini(rdev); 1602 r600_audio_fini(rdev);
1366 cayman_cp_enable(rdev, false); 1603 cayman_cp_enable(rdev, false);
1367 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1604 cayman_dma_stop(rdev);
1368 evergreen_irq_suspend(rdev); 1605 evergreen_irq_suspend(rdev);
1369 radeon_wb_disable(rdev); 1606 radeon_wb_disable(rdev);
1370 cayman_pcie_gart_disable(rdev); 1607 cayman_pcie_gart_disable(rdev);
@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev)
1431 ring->ring_obj = NULL; 1668 ring->ring_obj = NULL;
1432 r600_ring_init(rdev, ring, 1024 * 1024); 1669 r600_ring_init(rdev, ring, 1024 * 1024);
1433 1670
1671 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1672 ring->ring_obj = NULL;
1673 r600_ring_init(rdev, ring, 64 * 1024);
1674
1675 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1676 ring->ring_obj = NULL;
1677 r600_ring_init(rdev, ring, 64 * 1024);
1678
1434 rdev->ih.ring_obj = NULL; 1679 rdev->ih.ring_obj = NULL;
1435 r600_ih_ring_init(rdev, 64 * 1024); 1680 r600_ih_ring_init(rdev, 64 * 1024);
1436 1681
@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev)
1443 if (r) { 1688 if (r) {
1444 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1689 dev_err(rdev->dev, "disabling GPU acceleration\n");
1445 cayman_cp_fini(rdev); 1690 cayman_cp_fini(rdev);
1691 cayman_dma_fini(rdev);
1446 r600_irq_fini(rdev); 1692 r600_irq_fini(rdev);
1447 if (rdev->flags & RADEON_IS_IGP) 1693 if (rdev->flags & RADEON_IS_IGP)
1448 si_rlc_fini(rdev); 1694 si_rlc_fini(rdev);
@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev)
1473{ 1719{
1474 r600_blit_fini(rdev); 1720 r600_blit_fini(rdev);
1475 cayman_cp_fini(rdev); 1721 cayman_cp_fini(rdev);
1722 cayman_dma_fini(rdev);
1476 r600_irq_fini(rdev); 1723 r600_irq_fini(rdev);
1477 if (rdev->flags & RADEON_IS_IGP) 1724 if (rdev->flags & RADEON_IS_IGP)
1478 si_rlc_fini(rdev); 1725 si_rlc_fini(rdev);
@@ -1606,3 +1853,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1606 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 1853 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
1607 radeon_ring_write(ring, 0x0); 1854 radeon_ring_write(ring, 0x0);
1608} 1855}
1856
1857void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1858{
1859 struct radeon_ring *ring = &rdev->ring[ridx];
1860
1861 if (vm == NULL)
1862 return;
1863
1864 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1865 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
1866 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
1867
1868 /* flush hdp cache */
1869 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1870 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
1871 radeon_ring_write(ring, 1);
1872
1873 /* bits 0-7 are the VM contexts0-7 */
1874 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1875 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
1876 radeon_ring_write(ring, 1 << vm->id);
1877}
1878
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index f5e54a7e2bf2..b93186b8ee4b 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -50,6 +50,24 @@
50#define VMID(x) (((x) & 0x7) << 0) 50#define VMID(x) (((x) & 0x7) << 0)
51#define SRBM_STATUS 0x0E50 51#define SRBM_STATUS 0x0E50
52 52
53#define SRBM_SOFT_RESET 0x0E60
54#define SOFT_RESET_BIF (1 << 1)
55#define SOFT_RESET_CG (1 << 2)
56#define SOFT_RESET_DC (1 << 5)
57#define SOFT_RESET_DMA1 (1 << 6)
58#define SOFT_RESET_GRBM (1 << 8)
59#define SOFT_RESET_HDP (1 << 9)
60#define SOFT_RESET_IH (1 << 10)
61#define SOFT_RESET_MC (1 << 11)
62#define SOFT_RESET_RLC (1 << 13)
63#define SOFT_RESET_ROM (1 << 14)
64#define SOFT_RESET_SEM (1 << 15)
65#define SOFT_RESET_VMC (1 << 17)
66#define SOFT_RESET_DMA (1 << 20)
67#define SOFT_RESET_TST (1 << 21)
68#define SOFT_RESET_REGBB (1 << 22)
69#define SOFT_RESET_ORB (1 << 23)
70
53#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 71#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
54#define REQUEST_TYPE(x) (((x) & 0xf) << 0) 72#define REQUEST_TYPE(x) (((x) & 0xf) << 0)
55#define RESPONSE_TYPE_MASK 0x000000F0 73#define RESPONSE_TYPE_MASK 0x000000F0
@@ -599,5 +617,62 @@
599#define PACKET3_SET_APPEND_CNT 0x75 617#define PACKET3_SET_APPEND_CNT 0x75
600#define PACKET3_ME_WRITE 0x7A 618#define PACKET3_ME_WRITE 0x7A
601 619
620/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
621#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
622#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
623
624#define DMA_RB_CNTL 0xd000
625# define DMA_RB_ENABLE (1 << 0)
626# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
627# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
628# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
629# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
630# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
631#define DMA_RB_BASE 0xd004
632#define DMA_RB_RPTR 0xd008
633#define DMA_RB_WPTR 0xd00c
634
635#define DMA_RB_RPTR_ADDR_HI 0xd01c
636#define DMA_RB_RPTR_ADDR_LO 0xd020
637
638#define DMA_IB_CNTL 0xd024
639# define DMA_IB_ENABLE (1 << 0)
640# define DMA_IB_SWAP_ENABLE (1 << 4)
641# define CMD_VMID_FORCE (1 << 31)
642#define DMA_IB_RPTR 0xd028
643#define DMA_CNTL 0xd02c
644# define TRAP_ENABLE (1 << 0)
645# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
646# define SEM_WAIT_INT_ENABLE (1 << 2)
647# define DATA_SWAP_ENABLE (1 << 3)
648# define FENCE_SWAP_ENABLE (1 << 4)
649# define CTXEMPTY_INT_ENABLE (1 << 28)
650#define DMA_STATUS_REG 0xd034
651# define DMA_IDLE (1 << 0)
652#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
653#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
654#define DMA_TILING_CONFIG 0xd0b8
655#define DMA_MODE 0xd0bc
656
657#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
658 (((t) & 0x1) << 23) | \
659 (((s) & 0x1) << 22) | \
660 (((n) & 0xFFFFF) << 0))
661
662#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
663 (((vmid) & 0xF) << 20) | \
664 (((n) & 0xFFFFF) << 0))
665
666/* async DMA Packet types */
667#define DMA_PACKET_WRITE 0x2
668#define DMA_PACKET_COPY 0x3
669#define DMA_PACKET_INDIRECT_BUFFER 0x4
670#define DMA_PACKET_SEMAPHORE 0x5
671#define DMA_PACKET_FENCE 0x6
672#define DMA_PACKET_TRAP 0x7
673#define DMA_PACKET_SRBM_WRITE 0x9
674#define DMA_PACKET_CONSTANT_FILL 0xd
675#define DMA_PACKET_NOP 0xf
676
602#endif 677#endif
603 678
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 461bf53709f5..38b6fa374053 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
109#define RADEON_BIOS_NUM_SCRATCH 8 109#define RADEON_BIOS_NUM_SCRATCH 8
110 110
111/* max number of rings */ 111/* max number of rings */
112#define RADEON_NUM_RINGS 4 112#define RADEON_NUM_RINGS 5
113 113
114/* fence seq are set to this number when signaled */ 114/* fence seq are set to this number when signaled */
115#define RADEON_FENCE_SIGNALED_SEQ 0LL 115#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -124,6 +124,8 @@ extern int radeon_lockup_timeout;
124 124
125/* R600+ has an async dma ring */ 125/* R600+ has an async dma ring */
126#define R600_RING_TYPE_DMA_INDEX 3 126#define R600_RING_TYPE_DMA_INDEX 3
127/* cayman add a second async dma ring */
128#define CAYMAN_RING_TYPE_DMA1_INDEX 4
127 129
128/* hardcode those limit for now */ 130/* hardcode those limit for now */
129#define RADEON_VA_IB_OFFSET (1 << 20) 131#define RADEON_VA_IB_OFFSET (1 << 20)
@@ -893,6 +895,7 @@ struct radeon_wb {
893#define RADEON_WB_CP2_RPTR_OFFSET 1536 895#define RADEON_WB_CP2_RPTR_OFFSET 1536
894#define R600_WB_DMA_RPTR_OFFSET 1792 896#define R600_WB_DMA_RPTR_OFFSET 1792
895#define R600_WB_IH_WPTR_OFFSET 2048 897#define R600_WB_IH_WPTR_OFFSET 2048
898#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
896#define R600_WB_EVENT_OFFSET 3072 899#define R600_WB_EVENT_OFFSET 3072
897 900
898/** 901/**
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 1dd8d927e035..8cf8ae86973c 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = {
1481 .ib_test = &r600_ib_test, 1481 .ib_test = &r600_ib_test,
1482 .is_lockup = &evergreen_gpu_is_lockup, 1482 .is_lockup = &evergreen_gpu_is_lockup,
1483 .vm_flush = &cayman_vm_flush, 1483 .vm_flush = &cayman_vm_flush,
1484 },
1485 [R600_RING_TYPE_DMA_INDEX] = {
1486 .ib_execute = &cayman_dma_ring_ib_execute,
1487 .emit_fence = &evergreen_dma_fence_ring_emit,
1488 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1489 .cs_parse = NULL,
1490 .ring_test = &r600_dma_ring_test,
1491 .ib_test = &r600_dma_ib_test,
1492 .is_lockup = &cayman_dma_is_lockup,
1493 .vm_flush = &cayman_dma_vm_flush,
1494 },
1495 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1496 .ib_execute = &cayman_dma_ring_ib_execute,
1497 .emit_fence = &evergreen_dma_fence_ring_emit,
1498 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1499 .cs_parse = NULL,
1500 .ring_test = &r600_dma_ring_test,
1501 .ib_test = &r600_dma_ib_test,
1502 .is_lockup = &cayman_dma_is_lockup,
1503 .vm_flush = &cayman_dma_vm_flush,
1484 } 1504 }
1485 }, 1505 },
1486 .irq = { 1506 .irq = {
@@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = {
1497 .copy = { 1517 .copy = {
1498 .blit = &r600_copy_blit, 1518 .blit = &r600_copy_blit,
1499 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1519 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1500 .dma = NULL, 1520 .dma = &evergreen_copy_dma,
1501 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1521 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1502 .copy = &r600_copy_blit, 1522 .copy = &r600_copy_blit,
1503 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1523 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1504 }, 1524 },
@@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = {
1586 .ib_test = &r600_ib_test, 1606 .ib_test = &r600_ib_test,
1587 .is_lockup = &evergreen_gpu_is_lockup, 1607 .is_lockup = &evergreen_gpu_is_lockup,
1588 .vm_flush = &cayman_vm_flush, 1608 .vm_flush = &cayman_vm_flush,
1609 },
1610 [R600_RING_TYPE_DMA_INDEX] = {
1611 .ib_execute = &cayman_dma_ring_ib_execute,
1612 .emit_fence = &evergreen_dma_fence_ring_emit,
1613 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1614 .cs_parse = NULL,
1615 .ring_test = &r600_dma_ring_test,
1616 .ib_test = &r600_dma_ib_test,
1617 .is_lockup = &cayman_dma_is_lockup,
1618 .vm_flush = &cayman_dma_vm_flush,
1619 },
1620 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1621 .ib_execute = &cayman_dma_ring_ib_execute,
1622 .emit_fence = &evergreen_dma_fence_ring_emit,
1623 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1624 .cs_parse = NULL,
1625 .ring_test = &r600_dma_ring_test,
1626 .ib_test = &r600_dma_ib_test,
1627 .is_lockup = &cayman_dma_is_lockup,
1628 .vm_flush = &cayman_dma_vm_flush,
1589 } 1629 }
1590 }, 1630 },
1591 .irq = { 1631 .irq = {
@@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = {
1602 .copy = { 1642 .copy = {
1603 .blit = &r600_copy_blit, 1643 .blit = &r600_copy_blit,
1604 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1644 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1605 .dma = NULL, 1645 .dma = &evergreen_copy_dma,
1606 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1646 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1607 .copy = &r600_copy_blit, 1647 .copy = &r600_copy_blit,
1608 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1648 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1609 }, 1649 },
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 7a2705d0a4d9..c2988f706524 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
470 uint64_t addr, unsigned count, 470 uint64_t addr, unsigned count,
471 uint32_t incr, uint32_t flags); 471 uint32_t incr, uint32_t flags);
472int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 472int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
473void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
474 struct radeon_ib *ib);
475bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
476void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
473 477
474/* DCE6 - SI */ 478/* DCE6 - SI */
475void dce6_bandwidth_update(struct radeon_device *rdev); 479void dce6_bandwidth_update(struct radeon_device *rdev);