aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-02-12 13:21:35 -0500
committerDave Airlie <airlied@redhat.com>2011-02-13 18:23:27 -0500
commit40b4a7599d5555b408e594f4c8dae8015ccaae8f (patch)
tree7241f131fe800f67b49b0d66241609b3e171270d /drivers
parent01e2f533a234dc62d16c0d3d4fb9d71cf1ce50c3 (diff)
drm/radeon/kms: optimize CS state checking for r100->r500
The colorbuffer, zbuffer, and texture states are checked only once when they get changed. This improves performance in the apps which emit lots of draw packets and few state changes. This drops performance in glxgears by a 1% or so, but glxgears is not a benchmark we care about. The time spent in the kernel when running Torcs dropped from 33% to 23% and the frame rate is higher, which is a good thing. r600 might need something like this as well. Signed-off-by: Marek Olšák <maraeo@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/radeon/r100.c40
-rw-r--r--drivers/gpu/drm/radeon/r100_track.h11
-rw-r--r--drivers/gpu/drm/radeon/r200.c18
-rw-r--r--drivers/gpu/drm/radeon/r300.c20
4 files changed, 77 insertions, 12 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5f15820efe1..fdf4bc67ae5 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1427,6 +1427,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1427 } 1427 }
1428 track->zb.robj = reloc->robj; 1428 track->zb.robj = reloc->robj;
1429 track->zb.offset = idx_value; 1429 track->zb.offset = idx_value;
1430 track->zb_dirty = true;
1430 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1431 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1431 break; 1432 break;
1432 case RADEON_RB3D_COLOROFFSET: 1433 case RADEON_RB3D_COLOROFFSET:
@@ -1439,6 +1440,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1439 } 1440 }
1440 track->cb[0].robj = reloc->robj; 1441 track->cb[0].robj = reloc->robj;
1441 track->cb[0].offset = idx_value; 1442 track->cb[0].offset = idx_value;
1443 track->cb_dirty = true;
1442 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1444 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1443 break; 1445 break;
1444 case RADEON_PP_TXOFFSET_0: 1446 case RADEON_PP_TXOFFSET_0:
@@ -1454,6 +1456,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1454 } 1456 }
1455 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1457 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1456 track->textures[i].robj = reloc->robj; 1458 track->textures[i].robj = reloc->robj;
1459 track->tex_dirty = true;
1457 break; 1460 break;
1458 case RADEON_PP_CUBIC_OFFSET_T0_0: 1461 case RADEON_PP_CUBIC_OFFSET_T0_0:
1459 case RADEON_PP_CUBIC_OFFSET_T0_1: 1462 case RADEON_PP_CUBIC_OFFSET_T0_1:
@@ -1471,6 +1474,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1471 track->textures[0].cube_info[i].offset = idx_value; 1474 track->textures[0].cube_info[i].offset = idx_value;
1472 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1475 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1473 track->textures[0].cube_info[i].robj = reloc->robj; 1476 track->textures[0].cube_info[i].robj = reloc->robj;
1477 track->tex_dirty = true;
1474 break; 1478 break;
1475 case RADEON_PP_CUBIC_OFFSET_T1_0: 1479 case RADEON_PP_CUBIC_OFFSET_T1_0:
1476 case RADEON_PP_CUBIC_OFFSET_T1_1: 1480 case RADEON_PP_CUBIC_OFFSET_T1_1:
@@ -1488,6 +1492,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1488 track->textures[1].cube_info[i].offset = idx_value; 1492 track->textures[1].cube_info[i].offset = idx_value;
1489 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1493 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1490 track->textures[1].cube_info[i].robj = reloc->robj; 1494 track->textures[1].cube_info[i].robj = reloc->robj;
1495 track->tex_dirty = true;
1491 break; 1496 break;
1492 case RADEON_PP_CUBIC_OFFSET_T2_0: 1497 case RADEON_PP_CUBIC_OFFSET_T2_0:
1493 case RADEON_PP_CUBIC_OFFSET_T2_1: 1498 case RADEON_PP_CUBIC_OFFSET_T2_1:
@@ -1505,9 +1510,12 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1505 track->textures[2].cube_info[i].offset = idx_value; 1510 track->textures[2].cube_info[i].offset = idx_value;
1506 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1511 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1507 track->textures[2].cube_info[i].robj = reloc->robj; 1512 track->textures[2].cube_info[i].robj = reloc->robj;
1513 track->tex_dirty = true;
1508 break; 1514 break;
1509 case RADEON_RE_WIDTH_HEIGHT: 1515 case RADEON_RE_WIDTH_HEIGHT:
1510 track->maxy = ((idx_value >> 16) & 0x7FF); 1516 track->maxy = ((idx_value >> 16) & 0x7FF);
1517 track->cb_dirty = true;
1518 track->zb_dirty = true;
1511 break; 1519 break;
1512 case RADEON_RB3D_COLORPITCH: 1520 case RADEON_RB3D_COLORPITCH:
1513 r = r100_cs_packet_next_reloc(p, &reloc); 1521 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1528,9 +1536,11 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1528 ib[idx] = tmp; 1536 ib[idx] = tmp;
1529 1537
1530 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 1538 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1539 track->cb_dirty = true;
1531 break; 1540 break;
1532 case RADEON_RB3D_DEPTHPITCH: 1541 case RADEON_RB3D_DEPTHPITCH:
1533 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 1542 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1543 track->zb_dirty = true;
1534 break; 1544 break;
1535 case RADEON_RB3D_CNTL: 1545 case RADEON_RB3D_CNTL:
1536 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1546 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -1555,6 +1565,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1555 return -EINVAL; 1565 return -EINVAL;
1556 } 1566 }
1557 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 1567 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1568 track->cb_dirty = true;
1569 track->zb_dirty = true;
1558 break; 1570 break;
1559 case RADEON_RB3D_ZSTENCILCNTL: 1571 case RADEON_RB3D_ZSTENCILCNTL:
1560 switch (idx_value & 0xf) { 1572 switch (idx_value & 0xf) {
@@ -1572,6 +1584,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1572 default: 1584 default:
1573 break; 1585 break;
1574 } 1586 }
1587 track->zb_dirty = true;
1575 break; 1588 break;
1576 case RADEON_RB3D_ZPASS_ADDR: 1589 case RADEON_RB3D_ZPASS_ADDR:
1577 r = r100_cs_packet_next_reloc(p, &reloc); 1590 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1588,6 +1601,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1588 uint32_t temp = idx_value >> 4; 1601 uint32_t temp = idx_value >> 4;
1589 for (i = 0; i < track->num_texture; i++) 1602 for (i = 0; i < track->num_texture; i++)
1590 track->textures[i].enabled = !!(temp & (1 << i)); 1603 track->textures[i].enabled = !!(temp & (1 << i));
1604 track->tex_dirty = true;
1591 } 1605 }
1592 break; 1606 break;
1593 case RADEON_SE_VF_CNTL: 1607 case RADEON_SE_VF_CNTL:
@@ -1602,12 +1616,14 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1602 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1616 i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1603 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 1617 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1604 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1618 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1619 track->tex_dirty = true;
1605 break; 1620 break;
1606 case RADEON_PP_TEX_PITCH_0: 1621 case RADEON_PP_TEX_PITCH_0:
1607 case RADEON_PP_TEX_PITCH_1: 1622 case RADEON_PP_TEX_PITCH_1:
1608 case RADEON_PP_TEX_PITCH_2: 1623 case RADEON_PP_TEX_PITCH_2:
1609 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1624 i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1610 track->textures[i].pitch = idx_value + 32; 1625 track->textures[i].pitch = idx_value + 32;
1626 track->tex_dirty = true;
1611 break; 1627 break;
1612 case RADEON_PP_TXFILTER_0: 1628 case RADEON_PP_TXFILTER_0:
1613 case RADEON_PP_TXFILTER_1: 1629 case RADEON_PP_TXFILTER_1:
@@ -1621,6 +1637,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1621 tmp = (idx_value >> 27) & 0x7; 1637 tmp = (idx_value >> 27) & 0x7;
1622 if (tmp == 2 || tmp == 6) 1638 if (tmp == 2 || tmp == 6)
1623 track->textures[i].roundup_h = false; 1639 track->textures[i].roundup_h = false;
1640 track->tex_dirty = true;
1624 break; 1641 break;
1625 case RADEON_PP_TXFORMAT_0: 1642 case RADEON_PP_TXFORMAT_0:
1626 case RADEON_PP_TXFORMAT_1: 1643 case RADEON_PP_TXFORMAT_1:
@@ -1673,6 +1690,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1673 } 1690 }
1674 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 1691 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1675 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 1692 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1693 track->tex_dirty = true;
1676 break; 1694 break;
1677 case RADEON_PP_CUBIC_FACES_0: 1695 case RADEON_PP_CUBIC_FACES_0:
1678 case RADEON_PP_CUBIC_FACES_1: 1696 case RADEON_PP_CUBIC_FACES_1:
@@ -1683,6 +1701,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
1683 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1701 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1684 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1702 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1685 } 1703 }
1704 track->tex_dirty = true;
1686 break; 1705 break;
1687 default: 1706 default:
1688 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1707 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
@@ -3318,9 +3337,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3318 unsigned long size; 3337 unsigned long size;
3319 unsigned prim_walk; 3338 unsigned prim_walk;
3320 unsigned nverts; 3339 unsigned nverts;
3321 unsigned num_cb = track->num_cb; 3340 unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
3322 3341
3323 if (!track->zb_cb_clear && !track->color_channel_mask && 3342 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
3324 !track->blend_read_enable) 3343 !track->blend_read_enable)
3325 num_cb = 0; 3344 num_cb = 0;
3326 3345
@@ -3341,7 +3360,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3341 return -EINVAL; 3360 return -EINVAL;
3342 } 3361 }
3343 } 3362 }
3344 if (track->z_enabled) { 3363 track->cb_dirty = false;
3364
3365 if (track->zb_dirty && track->z_enabled) {
3345 if (track->zb.robj == NULL) { 3366 if (track->zb.robj == NULL) {
3346 DRM_ERROR("[drm] No buffer for z buffer !\n"); 3367 DRM_ERROR("[drm] No buffer for z buffer !\n");
3347 return -EINVAL; 3368 return -EINVAL;
@@ -3358,6 +3379,8 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3358 return -EINVAL; 3379 return -EINVAL;
3359 } 3380 }
3360 } 3381 }
3382 track->zb_dirty = false;
3383
3361 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 3384 prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
3362 if (track->vap_vf_cntl & (1 << 14)) { 3385 if (track->vap_vf_cntl & (1 << 14)) {
3363 nverts = track->vap_alt_nverts; 3386 nverts = track->vap_alt_nverts;
@@ -3417,13 +3440,22 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3417 prim_walk); 3440 prim_walk);
3418 return -EINVAL; 3441 return -EINVAL;
3419 } 3442 }
3420 return r100_cs_track_texture_check(rdev, track); 3443
3444 if (track->tex_dirty) {
3445 track->tex_dirty = false;
3446 return r100_cs_track_texture_check(rdev, track);
3447 }
3448 return 0;
3421} 3449}
3422 3450
3423void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 3451void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
3424{ 3452{
3425 unsigned i, face; 3453 unsigned i, face;
3426 3454
3455 track->cb_dirty = true;
3456 track->zb_dirty = true;
3457 track->tex_dirty = true;
3458
3427 if (rdev->family < CHIP_R300) { 3459 if (rdev->family < CHIP_R300) {
3428 track->num_cb = 1; 3460 track->num_cb = 1;
3429 if (rdev->family <= CHIP_RS200) 3461 if (rdev->family <= CHIP_RS200)
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index af65600e656..ee85c4a1fc0 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -52,14 +52,7 @@ struct r100_cs_track_texture {
52 unsigned compress_format; 52 unsigned compress_format;
53}; 53};
54 54
55struct r100_cs_track_limits {
56 unsigned num_cb;
57 unsigned num_texture;
58 unsigned max_levels;
59};
60
61struct r100_cs_track { 55struct r100_cs_track {
62 struct radeon_device *rdev;
63 unsigned num_cb; 56 unsigned num_cb;
64 unsigned num_texture; 57 unsigned num_texture;
65 unsigned maxy; 58 unsigned maxy;
@@ -78,6 +71,10 @@ struct r100_cs_track {
78 bool separate_cube; 71 bool separate_cube;
79 bool zb_cb_clear; 72 bool zb_cb_clear;
80 bool blend_read_enable; 73 bool blend_read_enable;
74
75 bool cb_dirty;
76 bool zb_dirty;
77 bool tex_dirty;
81}; 78};
82 79
83int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track); 80int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track);
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index d2408c39561..f2405830041 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -184,6 +184,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
184 } 184 }
185 track->zb.robj = reloc->robj; 185 track->zb.robj = reloc->robj;
186 track->zb.offset = idx_value; 186 track->zb.offset = idx_value;
187 track->zb_dirty = true;
187 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 188 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
188 break; 189 break;
189 case RADEON_RB3D_COLOROFFSET: 190 case RADEON_RB3D_COLOROFFSET:
@@ -196,6 +197,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
196 } 197 }
197 track->cb[0].robj = reloc->robj; 198 track->cb[0].robj = reloc->robj;
198 track->cb[0].offset = idx_value; 199 track->cb[0].offset = idx_value;
200 track->cb_dirty = true;
199 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 201 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
200 break; 202 break;
201 case R200_PP_TXOFFSET_0: 203 case R200_PP_TXOFFSET_0:
@@ -214,6 +216,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
214 } 216 }
215 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 217 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
216 track->textures[i].robj = reloc->robj; 218 track->textures[i].robj = reloc->robj;
219 track->tex_dirty = true;
217 break; 220 break;
218 case R200_PP_CUBIC_OFFSET_F1_0: 221 case R200_PP_CUBIC_OFFSET_F1_0:
219 case R200_PP_CUBIC_OFFSET_F2_0: 222 case R200_PP_CUBIC_OFFSET_F2_0:
@@ -257,9 +260,12 @@ int r200_packet0_check(struct radeon_cs_parser *p,
257 track->textures[i].cube_info[face - 1].offset = idx_value; 260 track->textures[i].cube_info[face - 1].offset = idx_value;
258 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 261 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
259 track->textures[i].cube_info[face - 1].robj = reloc->robj; 262 track->textures[i].cube_info[face - 1].robj = reloc->robj;
263 track->tex_dirty = true;
260 break; 264 break;
261 case RADEON_RE_WIDTH_HEIGHT: 265 case RADEON_RE_WIDTH_HEIGHT:
262 track->maxy = ((idx_value >> 16) & 0x7FF); 266 track->maxy = ((idx_value >> 16) & 0x7FF);
267 track->cb_dirty = true;
268 track->zb_dirty = true;
263 break; 269 break;
264 case RADEON_RB3D_COLORPITCH: 270 case RADEON_RB3D_COLORPITCH:
265 r = r100_cs_packet_next_reloc(p, &reloc); 271 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -280,9 +286,11 @@ int r200_packet0_check(struct radeon_cs_parser *p,
280 ib[idx] = tmp; 286 ib[idx] = tmp;
281 287
282 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 288 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
289 track->cb_dirty = true;
283 break; 290 break;
284 case RADEON_RB3D_DEPTHPITCH: 291 case RADEON_RB3D_DEPTHPITCH:
285 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 292 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
293 track->zb_dirty = true;
286 break; 294 break;
287 case RADEON_RB3D_CNTL: 295 case RADEON_RB3D_CNTL:
288 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 296 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -312,6 +320,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
312 } 320 }
313 321
314 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 322 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
323 track->cb_dirty = true;
324 track->zb_dirty = true;
315 break; 325 break;
316 case RADEON_RB3D_ZSTENCILCNTL: 326 case RADEON_RB3D_ZSTENCILCNTL:
317 switch (idx_value & 0xf) { 327 switch (idx_value & 0xf) {
@@ -329,6 +339,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
329 default: 339 default:
330 break; 340 break;
331 } 341 }
342 track->zb_dirty = true;
332 break; 343 break;
333 case RADEON_RB3D_ZPASS_ADDR: 344 case RADEON_RB3D_ZPASS_ADDR:
334 r = r100_cs_packet_next_reloc(p, &reloc); 345 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -345,6 +356,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
345 uint32_t temp = idx_value >> 4; 356 uint32_t temp = idx_value >> 4;
346 for (i = 0; i < track->num_texture; i++) 357 for (i = 0; i < track->num_texture; i++)
347 track->textures[i].enabled = !!(temp & (1 << i)); 358 track->textures[i].enabled = !!(temp & (1 << i));
359 track->tex_dirty = true;
348 } 360 }
349 break; 361 break;
350 case RADEON_SE_VF_CNTL: 362 case RADEON_SE_VF_CNTL:
@@ -369,6 +381,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
369 i = (reg - R200_PP_TXSIZE_0) / 32; 381 i = (reg - R200_PP_TXSIZE_0) / 32;
370 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 382 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
371 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 383 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
384 track->tex_dirty = true;
372 break; 385 break;
373 case R200_PP_TXPITCH_0: 386 case R200_PP_TXPITCH_0:
374 case R200_PP_TXPITCH_1: 387 case R200_PP_TXPITCH_1:
@@ -378,6 +391,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
378 case R200_PP_TXPITCH_5: 391 case R200_PP_TXPITCH_5:
379 i = (reg - R200_PP_TXPITCH_0) / 32; 392 i = (reg - R200_PP_TXPITCH_0) / 32;
380 track->textures[i].pitch = idx_value + 32; 393 track->textures[i].pitch = idx_value + 32;
394 track->tex_dirty = true;
381 break; 395 break;
382 case R200_PP_TXFILTER_0: 396 case R200_PP_TXFILTER_0:
383 case R200_PP_TXFILTER_1: 397 case R200_PP_TXFILTER_1:
@@ -394,6 +408,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
394 tmp = (idx_value >> 27) & 0x7; 408 tmp = (idx_value >> 27) & 0x7;
395 if (tmp == 2 || tmp == 6) 409 if (tmp == 2 || tmp == 6)
396 track->textures[i].roundup_h = false; 410 track->textures[i].roundup_h = false;
411 track->tex_dirty = true;
397 break; 412 break;
398 case R200_PP_TXMULTI_CTL_0: 413 case R200_PP_TXMULTI_CTL_0:
399 case R200_PP_TXMULTI_CTL_1: 414 case R200_PP_TXMULTI_CTL_1:
@@ -432,6 +447,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
432 track->textures[i].tex_coord_type = 1; 447 track->textures[i].tex_coord_type = 1;
433 break; 448 break;
434 } 449 }
450 track->tex_dirty = true;
435 break; 451 break;
436 case R200_PP_TXFORMAT_0: 452 case R200_PP_TXFORMAT_0:
437 case R200_PP_TXFORMAT_1: 453 case R200_PP_TXFORMAT_1:
@@ -488,6 +504,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
488 } 504 }
489 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 505 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
490 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 506 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
507 track->tex_dirty = true;
491 break; 508 break;
492 case R200_PP_CUBIC_FACES_0: 509 case R200_PP_CUBIC_FACES_0:
493 case R200_PP_CUBIC_FACES_1: 510 case R200_PP_CUBIC_FACES_1:
@@ -501,6 +518,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
501 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 518 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
502 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 519 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
503 } 520 }
521 track->tex_dirty = true;
504 break; 522 break;
505 default: 523 default:
506 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 524 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55fe5ba7def..15f94648f27 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -667,6 +667,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
667 } 667 }
668 track->cb[i].robj = reloc->robj; 668 track->cb[i].robj = reloc->robj;
669 track->cb[i].offset = idx_value; 669 track->cb[i].offset = idx_value;
670 track->cb_dirty = true;
670 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 671 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
671 break; 672 break;
672 case R300_ZB_DEPTHOFFSET: 673 case R300_ZB_DEPTHOFFSET:
@@ -679,6 +680,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
679 } 680 }
680 track->zb.robj = reloc->robj; 681 track->zb.robj = reloc->robj;
681 track->zb.offset = idx_value; 682 track->zb.offset = idx_value;
683 track->zb_dirty = true;
682 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 684 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
683 break; 685 break;
684 case R300_TX_OFFSET_0: 686 case R300_TX_OFFSET_0:
@@ -717,6 +719,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
717 tmp |= tile_flags; 719 tmp |= tile_flags;
718 ib[idx] = tmp; 720 ib[idx] = tmp;
719 track->textures[i].robj = reloc->robj; 721 track->textures[i].robj = reloc->robj;
722 track->tex_dirty = true;
720 break; 723 break;
721 /* Tracked registers */ 724 /* Tracked registers */
722 case 0x2084: 725 case 0x2084:
@@ -743,6 +746,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
743 if (p->rdev->family < CHIP_RV515) { 746 if (p->rdev->family < CHIP_RV515) {
744 track->maxy -= 1440; 747 track->maxy -= 1440;
745 } 748 }
749 track->cb_dirty = true;
750 track->zb_dirty = true;
746 break; 751 break;
747 case 0x4E00: 752 case 0x4E00:
748 /* RB3D_CCTL */ 753 /* RB3D_CCTL */
@@ -752,6 +757,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
752 return -EINVAL; 757 return -EINVAL;
753 } 758 }
754 track->num_cb = ((idx_value >> 5) & 0x3) + 1; 759 track->num_cb = ((idx_value >> 5) & 0x3) + 1;
760 track->cb_dirty = true;
755 break; 761 break;
756 case 0x4E38: 762 case 0x4E38:
757 case 0x4E3C: 763 case 0x4E3C:
@@ -814,6 +820,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
814 ((idx_value >> 21) & 0xF)); 820 ((idx_value >> 21) & 0xF));
815 return -EINVAL; 821 return -EINVAL;
816 } 822 }
823 track->cb_dirty = true;
817 break; 824 break;
818 case 0x4F00: 825 case 0x4F00:
819 /* ZB_CNTL */ 826 /* ZB_CNTL */
@@ -822,6 +829,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
822 } else { 829 } else {
823 track->z_enabled = false; 830 track->z_enabled = false;
824 } 831 }
832 track->zb_dirty = true;
825 break; 833 break;
826 case 0x4F10: 834 case 0x4F10:
827 /* ZB_FORMAT */ 835 /* ZB_FORMAT */
@@ -838,6 +846,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
838 (idx_value & 0xF)); 846 (idx_value & 0xF));
839 return -EINVAL; 847 return -EINVAL;
840 } 848 }
849 track->zb_dirty = true;
841 break; 850 break;
842 case 0x4F24: 851 case 0x4F24:
843 /* ZB_DEPTHPITCH */ 852 /* ZB_DEPTHPITCH */
@@ -861,6 +870,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
861 ib[idx] = tmp; 870 ib[idx] = tmp;
862 871
863 track->zb.pitch = idx_value & 0x3FFC; 872 track->zb.pitch = idx_value & 0x3FFC;
873 track->zb_dirty = true;
864 break; 874 break;
865 case 0x4104: 875 case 0x4104:
866 for (i = 0; i < 16; i++) { 876 for (i = 0; i < 16; i++) {
@@ -869,6 +879,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
869 enabled = !!(idx_value & (1 << i)); 879 enabled = !!(idx_value & (1 << i));
870 track->textures[i].enabled = enabled; 880 track->textures[i].enabled = enabled;
871 } 881 }
882 track->tex_dirty = true;
872 break; 883 break;
873 case 0x44C0: 884 case 0x44C0:
874 case 0x44C4: 885 case 0x44C4:
@@ -951,8 +962,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
951 DRM_ERROR("Invalid texture format %u\n", 962 DRM_ERROR("Invalid texture format %u\n",
952 (idx_value & 0x1F)); 963 (idx_value & 0x1F));
953 return -EINVAL; 964 return -EINVAL;
954 break;
955 } 965 }
966 track->tex_dirty = true;
956 break; 967 break;
957 case 0x4400: 968 case 0x4400:
958 case 0x4404: 969 case 0x4404:
@@ -980,6 +991,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
980 if (tmp == 2 || tmp == 4 || tmp == 6) { 991 if (tmp == 2 || tmp == 4 || tmp == 6) {
981 track->textures[i].roundup_h = false; 992 track->textures[i].roundup_h = false;
982 } 993 }
994 track->tex_dirty = true;
983 break; 995 break;
984 case 0x4500: 996 case 0x4500:
985 case 0x4504: 997 case 0x4504:
@@ -1017,6 +1029,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1017 DRM_ERROR("Forbidden bit TXFORMAT_MSB\n"); 1029 DRM_ERROR("Forbidden bit TXFORMAT_MSB\n");
1018 return -EINVAL; 1030 return -EINVAL;
1019 } 1031 }
1032 track->tex_dirty = true;
1020 break; 1033 break;
1021 case 0x4480: 1034 case 0x4480:
1022 case 0x4484: 1035 case 0x4484:
@@ -1046,6 +1059,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1046 track->textures[i].use_pitch = !!tmp; 1059 track->textures[i].use_pitch = !!tmp;
1047 tmp = (idx_value >> 22) & 0xF; 1060 tmp = (idx_value >> 22) & 0xF;
1048 track->textures[i].txdepth = tmp; 1061 track->textures[i].txdepth = tmp;
1062 track->tex_dirty = true;
1049 break; 1063 break;
1050 case R300_ZB_ZPASS_ADDR: 1064 case R300_ZB_ZPASS_ADDR:
1051 r = r100_cs_packet_next_reloc(p, &reloc); 1065 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1060,6 +1074,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1060 case 0x4e0c: 1074 case 0x4e0c:
1061 /* RB3D_COLOR_CHANNEL_MASK */ 1075 /* RB3D_COLOR_CHANNEL_MASK */
1062 track->color_channel_mask = idx_value; 1076 track->color_channel_mask = idx_value;
1077 track->cb_dirty = true;
1063 break; 1078 break;
1064 case 0x43a4: 1079 case 0x43a4:
1065 /* SC_HYPERZ_EN */ 1080 /* SC_HYPERZ_EN */
@@ -1073,6 +1088,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1073 case 0x4f1c: 1088 case 0x4f1c:
1074 /* ZB_BW_CNTL */ 1089 /* ZB_BW_CNTL */
1075 track->zb_cb_clear = !!(idx_value & (1 << 5)); 1090 track->zb_cb_clear = !!(idx_value & (1 << 5));
1091 track->cb_dirty = true;
1092 track->zb_dirty = true;
1076 if (p->rdev->hyperz_filp != p->filp) { 1093 if (p->rdev->hyperz_filp != p->filp) {
1077 if (idx_value & (R300_HIZ_ENABLE | 1094 if (idx_value & (R300_HIZ_ENABLE |
1078 R300_RD_COMP_ENABLE | 1095 R300_RD_COMP_ENABLE |
@@ -1084,6 +1101,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1084 case 0x4e04: 1101 case 0x4e04:
1085 /* RB3D_BLENDCNTL */ 1102 /* RB3D_BLENDCNTL */
1086 track->blend_read_enable = !!(idx_value & (1 << 2)); 1103 track->blend_read_enable = !!(idx_value & (1 << 2));
1104 track->cb_dirty = true;
1087 break; 1105 break;
1088 case 0x4f28: /* ZB_DEPTHCLEARVALUE */ 1106 case 0x4f28: /* ZB_DEPTHCLEARVALUE */
1089 break; 1107 break;