diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2012-05-31 19:00:25 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2012-06-01 12:00:14 -0400 |
commit | 416a2bd274566a6f607a271f524b2dc0b84d9106 (patch) | |
tree | 502720262c07cdb14bc14155bc8295cc20a7d411 /drivers/gpu/drm/radeon/r600.c | |
parent | 95c4b23ec4e2fa5604df229ddf134e31d7b3b378 (diff) |
drm/radeon: fixup tiling group size and backendmap on r6xx-r9xx (v4)
Tiling group size is always 256bits on r6xx/r7xx/r8xx/9xx. Also fix and
simplify render backend map. This now properly sets up the backend map
on r6xx-9xx which should improve 3D performance.
Vadim benchmarked also:
Some benchmarks on juniper (5750), fullscreen 1920x1080,
first result - kernel 3.4.0+ (fb21affa), second - with these patches:
Lightsmark: 91 fps => 123 fps +35%
Doom3: 74 fps => 101 fps +36%
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r600.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600.c | 199 |
1 files changed, 69 insertions, 130 deletions
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index f388a1d73b63..45cfcea63507 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c | |||
@@ -1376,113 +1376,51 @@ int r600_asic_reset(struct radeon_device *rdev) | |||
1376 | return r600_gpu_soft_reset(rdev); | 1376 | return r600_gpu_soft_reset(rdev); |
1377 | } | 1377 | } |
1378 | 1378 | ||
1379 | static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, | 1379 | u32 r6xx_remap_render_backend(struct radeon_device *rdev, |
1380 | u32 num_backends, | 1380 | u32 tiling_pipe_num, |
1381 | u32 backend_disable_mask) | 1381 | u32 max_rb_num, |
1382 | { | 1382 | u32 total_max_rb_num, |
1383 | u32 backend_map = 0; | 1383 | u32 disabled_rb_mask) |
1384 | u32 enabled_backends_mask; | 1384 | { |
1385 | u32 enabled_backends_count; | 1385 | u32 rendering_pipe_num, rb_num_width, req_rb_num; |
1386 | u32 cur_pipe; | 1386 | u32 pipe_rb_ratio, pipe_rb_remain; |
1387 | u32 swizzle_pipe[R6XX_MAX_PIPES]; | 1387 | u32 data = 0, mask = 1 << (max_rb_num - 1); |
1388 | u32 cur_backend; | 1388 | unsigned i, j; |
1389 | u32 i; | 1389 | |
1390 | 1390 | /* mask out the RBs that don't exist on that asic */ | |
1391 | if (num_tile_pipes > R6XX_MAX_PIPES) | 1391 | disabled_rb_mask |= (0xff << max_rb_num) & 0xff; |
1392 | num_tile_pipes = R6XX_MAX_PIPES; | 1392 | |
1393 | if (num_tile_pipes < 1) | 1393 | rendering_pipe_num = 1 << tiling_pipe_num; |
1394 | num_tile_pipes = 1; | 1394 | req_rb_num = total_max_rb_num - r600_count_pipe_bits(disabled_rb_mask); |
1395 | if (num_backends > R6XX_MAX_BACKENDS) | 1395 | BUG_ON(rendering_pipe_num < req_rb_num); |
1396 | num_backends = R6XX_MAX_BACKENDS; | 1396 | |
1397 | if (num_backends < 1) | 1397 | pipe_rb_ratio = rendering_pipe_num / req_rb_num; |
1398 | num_backends = 1; | 1398 | pipe_rb_remain = rendering_pipe_num - pipe_rb_ratio * req_rb_num; |
1399 | 1399 | ||
1400 | enabled_backends_mask = 0; | 1400 | if (rdev->family <= CHIP_RV740) { |
1401 | enabled_backends_count = 0; | 1401 | /* r6xx/r7xx */ |
1402 | for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { | 1402 | rb_num_width = 2; |
1403 | if (((backend_disable_mask >> i) & 1) == 0) { | 1403 | } else { |
1404 | enabled_backends_mask |= (1 << i); | 1404 | /* eg+ */ |
1405 | ++enabled_backends_count; | 1405 | rb_num_width = 4; |
1406 | } | ||
1407 | if (enabled_backends_count == num_backends) | ||
1408 | break; | ||
1409 | } | ||
1410 | |||
1411 | if (enabled_backends_count == 0) { | ||
1412 | enabled_backends_mask = 1; | ||
1413 | enabled_backends_count = 1; | ||
1414 | } | ||
1415 | |||
1416 | if (enabled_backends_count != num_backends) | ||
1417 | num_backends = enabled_backends_count; | ||
1418 | |||
1419 | memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); | ||
1420 | switch (num_tile_pipes) { | ||
1421 | case 1: | ||
1422 | swizzle_pipe[0] = 0; | ||
1423 | break; | ||
1424 | case 2: | ||
1425 | swizzle_pipe[0] = 0; | ||
1426 | swizzle_pipe[1] = 1; | ||
1427 | break; | ||
1428 | case 3: | ||
1429 | swizzle_pipe[0] = 0; | ||
1430 | swizzle_pipe[1] = 1; | ||
1431 | swizzle_pipe[2] = 2; | ||
1432 | break; | ||
1433 | case 4: | ||
1434 | swizzle_pipe[0] = 0; | ||
1435 | swizzle_pipe[1] = 1; | ||
1436 | swizzle_pipe[2] = 2; | ||
1437 | swizzle_pipe[3] = 3; | ||
1438 | break; | ||
1439 | case 5: | ||
1440 | swizzle_pipe[0] = 0; | ||
1441 | swizzle_pipe[1] = 1; | ||
1442 | swizzle_pipe[2] = 2; | ||
1443 | swizzle_pipe[3] = 3; | ||
1444 | swizzle_pipe[4] = 4; | ||
1445 | break; | ||
1446 | case 6: | ||
1447 | swizzle_pipe[0] = 0; | ||
1448 | swizzle_pipe[1] = 2; | ||
1449 | swizzle_pipe[2] = 4; | ||
1450 | swizzle_pipe[3] = 5; | ||
1451 | swizzle_pipe[4] = 1; | ||
1452 | swizzle_pipe[5] = 3; | ||
1453 | break; | ||
1454 | case 7: | ||
1455 | swizzle_pipe[0] = 0; | ||
1456 | swizzle_pipe[1] = 2; | ||
1457 | swizzle_pipe[2] = 4; | ||
1458 | swizzle_pipe[3] = 6; | ||
1459 | swizzle_pipe[4] = 1; | ||
1460 | swizzle_pipe[5] = 3; | ||
1461 | swizzle_pipe[6] = 5; | ||
1462 | break; | ||
1463 | case 8: | ||
1464 | swizzle_pipe[0] = 0; | ||
1465 | swizzle_pipe[1] = 2; | ||
1466 | swizzle_pipe[2] = 4; | ||
1467 | swizzle_pipe[3] = 6; | ||
1468 | swizzle_pipe[4] = 1; | ||
1469 | swizzle_pipe[5] = 3; | ||
1470 | swizzle_pipe[6] = 5; | ||
1471 | swizzle_pipe[7] = 7; | ||
1472 | break; | ||
1473 | } | 1406 | } |
1474 | 1407 | ||
1475 | cur_backend = 0; | 1408 | for (i = 0; i < max_rb_num; i++) { |
1476 | for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { | 1409 | if (!(mask & disabled_rb_mask)) { |
1477 | while (((1 << cur_backend) & enabled_backends_mask) == 0) | 1410 | for (j = 0; j < pipe_rb_ratio; j++) { |
1478 | cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; | 1411 | data <<= rb_num_width; |
1479 | 1412 | data |= max_rb_num - i - 1; | |
1480 | backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); | 1413 | } |
1481 | 1414 | if (pipe_rb_remain) { | |
1482 | cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; | 1415 | data <<= rb_num_width; |
1416 | data |= max_rb_num - i - 1; | ||
1417 | pipe_rb_remain--; | ||
1418 | } | ||
1419 | } | ||
1420 | mask >>= 1; | ||
1483 | } | 1421 | } |
1484 | 1422 | ||
1485 | return backend_map; | 1423 | return data; |
1486 | } | 1424 | } |
1487 | 1425 | ||
1488 | int r600_count_pipe_bits(uint32_t val) | 1426 | int r600_count_pipe_bits(uint32_t val) |
@@ -1500,7 +1438,6 @@ void r600_gpu_init(struct radeon_device *rdev) | |||
1500 | { | 1438 | { |
1501 | u32 tiling_config; | 1439 | u32 tiling_config; |
1502 | u32 ramcfg; | 1440 | u32 ramcfg; |
1503 | u32 backend_map; | ||
1504 | u32 cc_rb_backend_disable; | 1441 | u32 cc_rb_backend_disable; |
1505 | u32 cc_gc_shader_pipe_config; | 1442 | u32 cc_gc_shader_pipe_config; |
1506 | u32 tmp; | 1443 | u32 tmp; |
@@ -1511,8 +1448,9 @@ void r600_gpu_init(struct radeon_device *rdev) | |||
1511 | u32 sq_thread_resource_mgmt = 0; | 1448 | u32 sq_thread_resource_mgmt = 0; |
1512 | u32 sq_stack_resource_mgmt_1 = 0; | 1449 | u32 sq_stack_resource_mgmt_1 = 0; |
1513 | u32 sq_stack_resource_mgmt_2 = 0; | 1450 | u32 sq_stack_resource_mgmt_2 = 0; |
1451 | u32 disabled_rb_mask; | ||
1514 | 1452 | ||
1515 | /* FIXME: implement */ | 1453 | rdev->config.r600.tiling_group_size = 256; |
1516 | switch (rdev->family) { | 1454 | switch (rdev->family) { |
1517 | case CHIP_R600: | 1455 | case CHIP_R600: |
1518 | rdev->config.r600.max_pipes = 4; | 1456 | rdev->config.r600.max_pipes = 4; |
@@ -1616,10 +1554,7 @@ void r600_gpu_init(struct radeon_device *rdev) | |||
1616 | rdev->config.r600.tiling_nbanks = 4 << ((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); | 1554 | rdev->config.r600.tiling_nbanks = 4 << ((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); |
1617 | tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); | 1555 | tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); |
1618 | tiling_config |= GROUP_SIZE((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); | 1556 | tiling_config |= GROUP_SIZE((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); |
1619 | if ((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) | 1557 | |
1620 | rdev->config.r600.tiling_group_size = 512; | ||
1621 | else | ||
1622 | rdev->config.r600.tiling_group_size = 256; | ||
1623 | tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; | 1558 | tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; |
1624 | if (tmp > 3) { | 1559 | if (tmp > 3) { |
1625 | tiling_config |= ROW_TILING(3); | 1560 | tiling_config |= ROW_TILING(3); |
@@ -1631,32 +1566,36 @@ void r600_gpu_init(struct radeon_device *rdev) | |||
1631 | tiling_config |= BANK_SWAPS(1); | 1566 | tiling_config |= BANK_SWAPS(1); |
1632 | 1567 | ||
1633 | cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; | 1568 | cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; |
1634 | cc_rb_backend_disable |= | 1569 | tmp = R6XX_MAX_BACKENDS - |
1635 | BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK); | 1570 | r600_count_pipe_bits((cc_rb_backend_disable >> 16) & R6XX_MAX_BACKENDS_MASK); |
1636 | 1571 | if (tmp < rdev->config.r600.max_backends) { | |
1637 | cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; | 1572 | rdev->config.r600.max_backends = tmp; |
1638 | cc_gc_shader_pipe_config |= | 1573 | } |
1639 | INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK); | 1574 | |
1640 | cc_gc_shader_pipe_config |= | 1575 | cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0x00ffff00; |
1641 | INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK); | 1576 | tmp = R6XX_MAX_PIPES - |
1642 | 1577 | r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R6XX_MAX_PIPES_MASK); | |
1643 | backend_map = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes, | 1578 | if (tmp < rdev->config.r600.max_pipes) { |
1644 | (R6XX_MAX_BACKENDS - | 1579 | rdev->config.r600.max_pipes = tmp; |
1645 | r600_count_pipe_bits((cc_rb_backend_disable & | 1580 | } |
1646 | R6XX_MAX_BACKENDS_MASK) >> 16)), | 1581 | tmp = R6XX_MAX_SIMDS - |
1647 | (cc_rb_backend_disable >> 16)); | 1582 | r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK); |
1583 | if (tmp < rdev->config.r600.max_simds) { | ||
1584 | rdev->config.r600.max_simds = tmp; | ||
1585 | } | ||
1586 | |||
1587 | disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK; | ||
1588 | tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT; | ||
1589 | tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.r600.max_backends, | ||
1590 | R6XX_MAX_BACKENDS, disabled_rb_mask); | ||
1591 | tiling_config |= tmp << 16; | ||
1592 | rdev->config.r600.backend_map = tmp; | ||
1593 | |||
1648 | rdev->config.r600.tile_config = tiling_config; | 1594 | rdev->config.r600.tile_config = tiling_config; |
1649 | rdev->config.r600.backend_map = backend_map; | ||
1650 | tiling_config |= BACKEND_MAP(backend_map); | ||
1651 | WREG32(GB_TILING_CONFIG, tiling_config); | 1595 | WREG32(GB_TILING_CONFIG, tiling_config); |
1652 | WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); | 1596 | WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); |
1653 | WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); | 1597 | WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); |
1654 | 1598 | ||
1655 | /* Setup pipes */ | ||
1656 | WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); | ||
1657 | WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); | ||
1658 | WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); | ||
1659 | |||
1660 | tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); | 1599 | tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); |
1661 | WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); | 1600 | WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); |
1662 | WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK); | 1601 | WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK); |