aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r600.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2012-05-31 19:00:25 -0400
committerDave Airlie <airlied@redhat.com>2012-06-01 12:00:14 -0400
commit416a2bd274566a6f607a271f524b2dc0b84d9106 (patch)
tree502720262c07cdb14bc14155bc8295cc20a7d411 /drivers/gpu/drm/radeon/r600.c
parent95c4b23ec4e2fa5604df229ddf134e31d7b3b378 (diff)
drm/radeon: fixup tiling group size and backendmap on r6xx-r9xx (v4)
Tiling group size is always 256bits on r6xx/r7xx/r8xx/9xx. Also fix and simplify render backend map. This now properly sets up the backend map on r6xx-9xx which should improve 3D performance. Vadim benchmarked also: Some benchmarks on juniper (5750), fullscreen 1920x1080, first result - kernel 3.4.0+ (fb21affa), second - with these patches: Lightsmark: 91 fps => 123 fps +35% Doom3: 74 fps => 101 fps +36% Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r600.c')
-rw-r--r--drivers/gpu/drm/radeon/r600.c199
1 files changed, 69 insertions, 130 deletions
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index f388a1d73b63..45cfcea63507 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1376,113 +1376,51 @@ int r600_asic_reset(struct radeon_device *rdev)
1376 return r600_gpu_soft_reset(rdev); 1376 return r600_gpu_soft_reset(rdev);
1377} 1377}
1378 1378
1379static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, 1379u32 r6xx_remap_render_backend(struct radeon_device *rdev,
1380 u32 num_backends, 1380 u32 tiling_pipe_num,
1381 u32 backend_disable_mask) 1381 u32 max_rb_num,
1382{ 1382 u32 total_max_rb_num,
1383 u32 backend_map = 0; 1383 u32 disabled_rb_mask)
1384 u32 enabled_backends_mask; 1384{
1385 u32 enabled_backends_count; 1385 u32 rendering_pipe_num, rb_num_width, req_rb_num;
1386 u32 cur_pipe; 1386 u32 pipe_rb_ratio, pipe_rb_remain;
1387 u32 swizzle_pipe[R6XX_MAX_PIPES]; 1387 u32 data = 0, mask = 1 << (max_rb_num - 1);
1388 u32 cur_backend; 1388 unsigned i, j;
1389 u32 i; 1389
1390 1390 /* mask out the RBs that don't exist on that asic */
1391 if (num_tile_pipes > R6XX_MAX_PIPES) 1391 disabled_rb_mask |= (0xff << max_rb_num) & 0xff;
1392 num_tile_pipes = R6XX_MAX_PIPES; 1392
1393 if (num_tile_pipes < 1) 1393 rendering_pipe_num = 1 << tiling_pipe_num;
1394 num_tile_pipes = 1; 1394 req_rb_num = total_max_rb_num - r600_count_pipe_bits(disabled_rb_mask);
1395 if (num_backends > R6XX_MAX_BACKENDS) 1395 BUG_ON(rendering_pipe_num < req_rb_num);
1396 num_backends = R6XX_MAX_BACKENDS; 1396
1397 if (num_backends < 1) 1397 pipe_rb_ratio = rendering_pipe_num / req_rb_num;
1398 num_backends = 1; 1398 pipe_rb_remain = rendering_pipe_num - pipe_rb_ratio * req_rb_num;
1399 1399
1400 enabled_backends_mask = 0; 1400 if (rdev->family <= CHIP_RV740) {
1401 enabled_backends_count = 0; 1401 /* r6xx/r7xx */
1402 for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { 1402 rb_num_width = 2;
1403 if (((backend_disable_mask >> i) & 1) == 0) { 1403 } else {
1404 enabled_backends_mask |= (1 << i); 1404 /* eg+ */
1405 ++enabled_backends_count; 1405 rb_num_width = 4;
1406 }
1407 if (enabled_backends_count == num_backends)
1408 break;
1409 }
1410
1411 if (enabled_backends_count == 0) {
1412 enabled_backends_mask = 1;
1413 enabled_backends_count = 1;
1414 }
1415
1416 if (enabled_backends_count != num_backends)
1417 num_backends = enabled_backends_count;
1418
1419 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
1420 switch (num_tile_pipes) {
1421 case 1:
1422 swizzle_pipe[0] = 0;
1423 break;
1424 case 2:
1425 swizzle_pipe[0] = 0;
1426 swizzle_pipe[1] = 1;
1427 break;
1428 case 3:
1429 swizzle_pipe[0] = 0;
1430 swizzle_pipe[1] = 1;
1431 swizzle_pipe[2] = 2;
1432 break;
1433 case 4:
1434 swizzle_pipe[0] = 0;
1435 swizzle_pipe[1] = 1;
1436 swizzle_pipe[2] = 2;
1437 swizzle_pipe[3] = 3;
1438 break;
1439 case 5:
1440 swizzle_pipe[0] = 0;
1441 swizzle_pipe[1] = 1;
1442 swizzle_pipe[2] = 2;
1443 swizzle_pipe[3] = 3;
1444 swizzle_pipe[4] = 4;
1445 break;
1446 case 6:
1447 swizzle_pipe[0] = 0;
1448 swizzle_pipe[1] = 2;
1449 swizzle_pipe[2] = 4;
1450 swizzle_pipe[3] = 5;
1451 swizzle_pipe[4] = 1;
1452 swizzle_pipe[5] = 3;
1453 break;
1454 case 7:
1455 swizzle_pipe[0] = 0;
1456 swizzle_pipe[1] = 2;
1457 swizzle_pipe[2] = 4;
1458 swizzle_pipe[3] = 6;
1459 swizzle_pipe[4] = 1;
1460 swizzle_pipe[5] = 3;
1461 swizzle_pipe[6] = 5;
1462 break;
1463 case 8:
1464 swizzle_pipe[0] = 0;
1465 swizzle_pipe[1] = 2;
1466 swizzle_pipe[2] = 4;
1467 swizzle_pipe[3] = 6;
1468 swizzle_pipe[4] = 1;
1469 swizzle_pipe[5] = 3;
1470 swizzle_pipe[6] = 5;
1471 swizzle_pipe[7] = 7;
1472 break;
1473 } 1406 }
1474 1407
1475 cur_backend = 0; 1408 for (i = 0; i < max_rb_num; i++) {
1476 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 1409 if (!(mask & disabled_rb_mask)) {
1477 while (((1 << cur_backend) & enabled_backends_mask) == 0) 1410 for (j = 0; j < pipe_rb_ratio; j++) {
1478 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 1411 data <<= rb_num_width;
1479 1412 data |= max_rb_num - i - 1;
1480 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 1413 }
1481 1414 if (pipe_rb_remain) {
1482 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 1415 data <<= rb_num_width;
1416 data |= max_rb_num - i - 1;
1417 pipe_rb_remain--;
1418 }
1419 }
1420 mask >>= 1;
1483 } 1421 }
1484 1422
1485 return backend_map; 1423 return data;
1486} 1424}
1487 1425
1488int r600_count_pipe_bits(uint32_t val) 1426int r600_count_pipe_bits(uint32_t val)
@@ -1500,7 +1438,6 @@ void r600_gpu_init(struct radeon_device *rdev)
1500{ 1438{
1501 u32 tiling_config; 1439 u32 tiling_config;
1502 u32 ramcfg; 1440 u32 ramcfg;
1503 u32 backend_map;
1504 u32 cc_rb_backend_disable; 1441 u32 cc_rb_backend_disable;
1505 u32 cc_gc_shader_pipe_config; 1442 u32 cc_gc_shader_pipe_config;
1506 u32 tmp; 1443 u32 tmp;
@@ -1511,8 +1448,9 @@ void r600_gpu_init(struct radeon_device *rdev)
1511 u32 sq_thread_resource_mgmt = 0; 1448 u32 sq_thread_resource_mgmt = 0;
1512 u32 sq_stack_resource_mgmt_1 = 0; 1449 u32 sq_stack_resource_mgmt_1 = 0;
1513 u32 sq_stack_resource_mgmt_2 = 0; 1450 u32 sq_stack_resource_mgmt_2 = 0;
1451 u32 disabled_rb_mask;
1514 1452
1515 /* FIXME: implement */ 1453 rdev->config.r600.tiling_group_size = 256;
1516 switch (rdev->family) { 1454 switch (rdev->family) {
1517 case CHIP_R600: 1455 case CHIP_R600:
1518 rdev->config.r600.max_pipes = 4; 1456 rdev->config.r600.max_pipes = 4;
@@ -1616,10 +1554,7 @@ void r600_gpu_init(struct radeon_device *rdev)
1616 rdev->config.r600.tiling_nbanks = 4 << ((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); 1554 rdev->config.r600.tiling_nbanks = 4 << ((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
1617 tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); 1555 tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
1618 tiling_config |= GROUP_SIZE((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); 1556 tiling_config |= GROUP_SIZE((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
1619 if ((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) 1557
1620 rdev->config.r600.tiling_group_size = 512;
1621 else
1622 rdev->config.r600.tiling_group_size = 256;
1623 tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; 1558 tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
1624 if (tmp > 3) { 1559 if (tmp > 3) {
1625 tiling_config |= ROW_TILING(3); 1560 tiling_config |= ROW_TILING(3);
@@ -1631,32 +1566,36 @@ void r600_gpu_init(struct radeon_device *rdev)
1631 tiling_config |= BANK_SWAPS(1); 1566 tiling_config |= BANK_SWAPS(1);
1632 1567
1633 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; 1568 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
1634 cc_rb_backend_disable |= 1569 tmp = R6XX_MAX_BACKENDS -
1635 BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK); 1570 r600_count_pipe_bits((cc_rb_backend_disable >> 16) & R6XX_MAX_BACKENDS_MASK);
1636 1571 if (tmp < rdev->config.r600.max_backends) {
1637 cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; 1572 rdev->config.r600.max_backends = tmp;
1638 cc_gc_shader_pipe_config |= 1573 }
1639 INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK); 1574
1640 cc_gc_shader_pipe_config |= 1575 cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0x00ffff00;
1641 INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK); 1576 tmp = R6XX_MAX_PIPES -
1642 1577 r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R6XX_MAX_PIPES_MASK);
1643 backend_map = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes, 1578 if (tmp < rdev->config.r600.max_pipes) {
1644 (R6XX_MAX_BACKENDS - 1579 rdev->config.r600.max_pipes = tmp;
1645 r600_count_pipe_bits((cc_rb_backend_disable & 1580 }
1646 R6XX_MAX_BACKENDS_MASK) >> 16)), 1581 tmp = R6XX_MAX_SIMDS -
1647 (cc_rb_backend_disable >> 16)); 1582 r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
1583 if (tmp < rdev->config.r600.max_simds) {
1584 rdev->config.r600.max_simds = tmp;
1585 }
1586
1587 disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
1588 tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
1589 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.r600.max_backends,
1590 R6XX_MAX_BACKENDS, disabled_rb_mask);
1591 tiling_config |= tmp << 16;
1592 rdev->config.r600.backend_map = tmp;
1593
1648 rdev->config.r600.tile_config = tiling_config; 1594 rdev->config.r600.tile_config = tiling_config;
1649 rdev->config.r600.backend_map = backend_map;
1650 tiling_config |= BACKEND_MAP(backend_map);
1651 WREG32(GB_TILING_CONFIG, tiling_config); 1595 WREG32(GB_TILING_CONFIG, tiling_config);
1652 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); 1596 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
1653 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); 1597 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
1654 1598
1655 /* Setup pipes */
1656 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1657 WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1658 WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1659
1660 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 1599 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
1661 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); 1600 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
1662 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK); 1601 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);