aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r100.c
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-02-12 13:21:35 -0500
committerDave Airlie <airlied@redhat.com>2011-02-13 18:23:27 -0500
commit40b4a7599d5555b408e594f4c8dae8015ccaae8f (patch)
tree7241f131fe800f67b49b0d66241609b3e171270d /drivers/gpu/drm/radeon/r100.c
parent01e2f533a234dc62d16c0d3d4fb9d71cf1ce50c3 (diff)
drm/radeon/kms: optimize CS state checking for r100->r500
The colorbuffer, zbuffer, and texture states are checked only once when they get changed. This improves performance in the apps which emit lots of draw packets and few state changes. This drops performance in glxgears by a 1% or so, but glxgears is not a benchmark we care about. The time spent in the kernel when running Torcs dropped from 33% to 23% and the frame rate is higher, which is a good thing. r600 might need something like this as well. Signed-off-by: Marek Olšák <maraeo@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r100.c')
-rw-r--r--drivers/gpu/drm/radeon/r100.c40
1 files changed, 36 insertions, 4 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5f15820efe12..fdf4bc67ae58 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1427,6 +1427,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1427 } 1427 }
1428 track->zb.robj = reloc->robj; 1428 track->zb.robj = reloc->robj;
1429 track->zb.offset = idx_value; 1429 track->zb.offset = idx_value;
1430 track->zb_dirty = true;
1430 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1431 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1431 break; 1432 break;
1432 case RADEON_RB3D_COLOROFFSET: 1433 case RADEON_RB3D_COLOROFFSET:
@@ -1439,6 +1440,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1439 } 1440 }
1440 track->cb[0].robj = reloc->robj; 1441 track->cb[0].robj = reloc->robj;
1441 track->cb[0].offset = idx_value; 1442 track->cb[0].offset = idx_value;
1443 track->cb_dirty = true;
1442 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1444 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1443 break; 1445 break;
1444 case RADEON_PP_TXOFFSET_0: 1446 case RADEON_PP_TXOFFSET_0:
@@ -1454,6 +1456,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1454 } 1456 }
1455 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1457 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1456 track->textures[i].robj = reloc->robj; 1458 track->textures[i].robj = reloc->robj;
1459 track->tex_dirty = true;
1457 break; 1460 break;
1458 case RADEON_PP_CUBIC_OFFSET_T0_0: 1461 case RADEON_PP_CUBIC_OFFSET_T0_0:
1459 case RADEON_PP_CUBIC_OFFSET_T0_1: 1462 case RADEON_PP_CUBIC_OFFSET_T0_1:
@@ -1471,6 +1474,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1471 track->textures[0].cube_info[i].offset = idx_value; 1474 track->textures[0].cube_info[i].offset = idx_value;
1472 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1475 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1473 track->textures[0].cube_info[i].robj = reloc->robj; 1476 track->textures[0].cube_info[i].robj = reloc->robj;
1477 track->tex_dirty = true;
1474 break; 1478 break;
1475 case RADEON_PP_CUBIC_OFFSET_T1_0: 1479 case RADEON_PP_CUBIC_OFFSET_T1_0:
1476 case RADEON_PP_CUBIC_OFFSET_T1_1: 1480 case RADEON_PP_CUBIC_OFFSET_T1_1:
@@ -1488,6 +1492,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1488 track->textures[1].cube_info[i].offset = idx_value; 1492 track->textures[1].cube_info[i].offset = idx_value;
1489 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1493 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1490 track->textures[1].cube_info[i].robj = reloc->robj; 1494 track->textures[1].cube_info[i].robj = reloc->robj;
1495 track->tex_dirty = true;
1491 break; 1496 break;
1492 case RADEON_PP_CUBIC_OFFSET_T2_0: 1497 case RADEON_PP_CUBIC_OFFSET_T2_0:
1493 case RADEON_PP_CUBIC_OFFSET_T2_1: 1498 case RADEON_PP_CUBIC_OFFSET_T2_1:
@@ -1505,9 +1510,12 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1505 track->textures[2].cube_info[i].offset = idx_value; 1510 track->textures[2].cube_info[i].offset = idx_value;
1506 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1511 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1507 track->textures[2].cube_info[i].robj = reloc->robj; 1512 track->textures[2].cube_info[i].robj = reloc->robj;
1513 track->tex_dirty = true;
1508 break; 1514 break;
1509 case RADEON_RE_WIDTH_HEIGHT: 1515 case RADEON_RE_WIDTH_HEIGHT:
1510 track->maxy = ((idx_value >> 16) & 0x7FF); 1516 track->maxy = ((idx_value >> 16) & 0x7FF);
1517 track->cb_dirty = true;
1518 track->zb_dirty = true;
1511 break; 1519 break;
1512 case RADEON_RB3D_COLORPITCH: 1520 case RADEON_RB3D_COLORPITCH:
1513 r = r100_cs_packet_next_reloc(p, &reloc); 1521 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1528,9 +1536,11 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1528 ib[idx] = tmp; 1536 ib[idx] = tmp;
1529 1537
1530 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 1538 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1539 track->cb_dirty = true;
1531 break; 1540 break;
1532 case RADEON_RB3D_DEPTHPITCH: 1541 case RADEON_RB3D_DEPTHPITCH:
1533 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 1542 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1543 track->zb_dirty = true;
1534 break; 1544 break;
1535 case RADEON_RB3D_CNTL: 1545 case RADEON_RB3D_CNTL:
1536 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1546 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -1555,6 +1565,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1555 return -EINVAL; 1565 return -EINVAL;
1556 } 1566 }
1557 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 1567 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1568 track->cb_dirty = true;
1569 track->zb_dirty = true;
1558 break; 1570 break;
1559 case RADEON_RB3D_ZSTENCILCNTL: 1571 case RADEON_RB3D_ZSTENCILCNTL:
1560 switch (idx_value & 0xf) { 1572 switch (idx_value & 0xf) {
@@ -1572,6 +1584,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1572 default: 1584 default:
1573 break; 1585 break;
1574 } 1586 }
1587 track->zb_dirty = true;
1575 break; 1588 break;
1576 case RADEON_RB3D_ZPASS_ADDR: 1589 case RADEON_RB3D_ZPASS_ADDR:
1577 r = r100_cs_packet_next_reloc(p, &reloc); 1590 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1588,6 +1601,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1588 uint32_t temp = idx_value >> 4; 1601 uint32_t temp = idx_value >> 4;
1589 for (i = 0; i < track->num_texture; i++) 1602 for (i = 0; i < track->num_texture; i++)
1590 track->textures[i].enabled = !!(temp & (1 << i)); 1603 track->textures[i].enabled = !!(temp & (1 << i));
1604 track->tex_dirty = true;
1591 } 1605 }
1592 break; 1606 break;
1593 case RADEON_SE_VF_CNTL: 1607 case RADEON_SE_VF_CNTL:
@@ -1602,12 +1616,14 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1602 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1616 i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1603 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 1617 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1604 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1618 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1619 track->tex_dirty = true;
1605 break; 1620 break;
1606 case RADEON_PP_TEX_PITCH_0: 1621 case RADEON_PP_TEX_PITCH_0:
1607 case RADEON_PP_TEX_PITCH_1: 1622 case RADEON_PP_TEX_PITCH_1:
1608 case RADEON_PP_TEX_PITCH_2: 1623 case RADEON_PP_TEX_PITCH_2:
1609 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1624 i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1610 track->textures[i].pitch = idx_value + 32; 1625 track->textures[i].pitch = idx_value + 32;
1626 track->tex_dirty = true;
1611 break; 1627 break;
1612 case RADEON_PP_TXFILTER_0: 1628 case RADEON_PP_TXFILTER_0:
1613 case RADEON_PP_TXFILTER_1: 1629 case RADEON_PP_TXFILTER_1:
@@ -1621,6 +1637,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1621 tmp = (idx_value >> 27) & 0x7; 1637 tmp = (idx_value >> 27) & 0x7;
1622 if (tmp == 2 || tmp == 6) 1638 if (tmp == 2 || tmp == 6)
1623 track->textures[i].roundup_h = false; 1639 track->textures[i].roundup_h = false;
1640 track->tex_dirty = true;
1624 break; 1641 break;
1625 case RADEON_PP_TXFORMAT_0: 1642 case RADEON_PP_TXFORMAT_0:
1626 case RADEON_PP_TXFORMAT_1: 1643 case RADEON_PP_TXFORMAT_1:
@@ -1673,6 +1690,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1673 } 1690 }
1674 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 1691 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1675 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 1692 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1693 track->tex_dirty = true;
1676 break; 1694 break;
1677 case RADEON_PP_CUBIC_FACES_0: 1695 case RADEON_PP_CUBIC_FACES_0:
1678 case RADEON_PP_CUBIC_FACES_1: 1696 case RADEON_PP_CUBIC_FACES_1:
@@ -1683,6 +1701,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1683 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1701 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1684 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1702 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1685 } 1703 }
1704 track->tex_dirty = true;
1686 break; 1705 break;
1687 default: 1706 default:
1688 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1707 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
@@ -3318,9 +3337,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3318 unsigned long size; 3337 unsigned long size;
3319 unsigned prim_walk; 3338 unsigned prim_walk;
3320 unsigned nverts; 3339 unsigned nverts;
3321 unsigned num_cb = track->num_cb; 3340 unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
3322 3341
3323 if (!track->zb_cb_clear && !track->color_channel_mask && 3342 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
3324 !track->blend_read_enable) 3343 !track->blend_read_enable)
3325 num_cb = 0; 3344 num_cb = 0;
3326 3345
@@ -3341,7 +3360,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3341 return -EINVAL; 3360 return -EINVAL;
3342 } 3361 }
3343 } 3362 }
3344 if (track->z_enabled) { 3363 track->cb_dirty = false;
3364
3365 if (track->zb_dirty && track->z_enabled) {
3345 if (track->zb.robj == NULL) { 3366 if (track->zb.robj == NULL) {
3346 DRM_ERROR("[drm] No buffer for z buffer !\n"); 3367 DRM_ERROR("[drm] No buffer for z buffer !\n");
3347 return -EINVAL; 3368 return -EINVAL;
@@ -3358,6 +3379,8 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3358 return -EINVAL; 3379 return -EINVAL;
3359 } 3380 }
3360 } 3381 }
3382 track->zb_dirty = false;
3383
3361 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 3384 prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
3362 if (track->vap_vf_cntl & (1 << 14)) { 3385 if (track->vap_vf_cntl & (1 << 14)) {
3363 nverts = track->vap_alt_nverts; 3386 nverts = track->vap_alt_nverts;
@@ -3417,13 +3440,22 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3417 prim_walk); 3440 prim_walk);
3418 return -EINVAL; 3441 return -EINVAL;
3419 } 3442 }
3420 return r100_cs_track_texture_check(rdev, track); 3443
3444 if (track->tex_dirty) {
3445 track->tex_dirty = false;
3446 return r100_cs_track_texture_check(rdev, track);
3447 }
3448 return 0;
3421} 3449}
3422 3450
3423void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 3451void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
3424{ 3452{
3425 unsigned i, face; 3453 unsigned i, face;
3426 3454
3455 track->cb_dirty = true;
3456 track->zb_dirty = true;
3457 track->tex_dirty = true;
3458
3427 if (rdev->family < CHIP_R300) { 3459 if (rdev->family < CHIP_R300) {
3428 track->num_cb = 1; 3460 track->num_cb = 1;
3429 if (rdev->family <= CHIP_RS200) 3461 if (rdev->family <= CHIP_RS200)