aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r200.c
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-02-12 13:21:35 -0500
committerDave Airlie <airlied@redhat.com>2011-02-13 18:23:27 -0500
commit40b4a7599d5555b408e594f4c8dae8015ccaae8f (patch)
tree7241f131fe800f67b49b0d66241609b3e171270d /drivers/gpu/drm/radeon/r200.c
parent01e2f533a234dc62d16c0d3d4fb9d71cf1ce50c3 (diff)
drm/radeon/kms: optimize CS state checking for r100->r500
The colorbuffer, zbuffer, and texture states are checked only once when they get changed. This improves performance in the apps which emit lots of draw packets and few state changes. This drops performance in glxgears by a 1% or so, but glxgears is not a benchmark we care about. The time spent in the kernel when running Torcs dropped from 33% to 23% and the frame rate is higher, which is a good thing. r600 might need something like this as well. Signed-off-by: Marek Olšák <maraeo@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r200.c')
-rw-r--r--drivers/gpu/drm/radeon/r200.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index d2408c395619..f24058300413 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -184,6 +184,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
184 } 184 }
185 track->zb.robj = reloc->robj; 185 track->zb.robj = reloc->robj;
186 track->zb.offset = idx_value; 186 track->zb.offset = idx_value;
187 track->zb_dirty = true;
187 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 188 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
188 break; 189 break;
189 case RADEON_RB3D_COLOROFFSET: 190 case RADEON_RB3D_COLOROFFSET:
@@ -196,6 +197,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
196 } 197 }
197 track->cb[0].robj = reloc->robj; 198 track->cb[0].robj = reloc->robj;
198 track->cb[0].offset = idx_value; 199 track->cb[0].offset = idx_value;
200 track->cb_dirty = true;
199 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 201 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
200 break; 202 break;
201 case R200_PP_TXOFFSET_0: 203 case R200_PP_TXOFFSET_0:
@@ -214,6 +216,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
214 } 216 }
215 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 217 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
216 track->textures[i].robj = reloc->robj; 218 track->textures[i].robj = reloc->robj;
219 track->tex_dirty = true;
217 break; 220 break;
218 case R200_PP_CUBIC_OFFSET_F1_0: 221 case R200_PP_CUBIC_OFFSET_F1_0:
219 case R200_PP_CUBIC_OFFSET_F2_0: 222 case R200_PP_CUBIC_OFFSET_F2_0:
@@ -257,9 +260,12 @@ int r200_packet0_check(struct radeon_cs_parser *p,
257 track->textures[i].cube_info[face - 1].offset = idx_value; 260 track->textures[i].cube_info[face - 1].offset = idx_value;
258 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 261 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
259 track->textures[i].cube_info[face - 1].robj = reloc->robj; 262 track->textures[i].cube_info[face - 1].robj = reloc->robj;
263 track->tex_dirty = true;
260 break; 264 break;
261 case RADEON_RE_WIDTH_HEIGHT: 265 case RADEON_RE_WIDTH_HEIGHT:
262 track->maxy = ((idx_value >> 16) & 0x7FF); 266 track->maxy = ((idx_value >> 16) & 0x7FF);
267 track->cb_dirty = true;
268 track->zb_dirty = true;
263 break; 269 break;
264 case RADEON_RB3D_COLORPITCH: 270 case RADEON_RB3D_COLORPITCH:
265 r = r100_cs_packet_next_reloc(p, &reloc); 271 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -280,9 +286,11 @@ int r200_packet0_check(struct radeon_cs_parser *p,
280 ib[idx] = tmp; 286 ib[idx] = tmp;
281 287
282 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 288 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
289 track->cb_dirty = true;
283 break; 290 break;
284 case RADEON_RB3D_DEPTHPITCH: 291 case RADEON_RB3D_DEPTHPITCH:
285 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 292 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
293 track->zb_dirty = true;
286 break; 294 break;
287 case RADEON_RB3D_CNTL: 295 case RADEON_RB3D_CNTL:
288 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 296 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -312,6 +320,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
312 } 320 }
313 321
314 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 322 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
323 track->cb_dirty = true;
324 track->zb_dirty = true;
315 break; 325 break;
316 case RADEON_RB3D_ZSTENCILCNTL: 326 case RADEON_RB3D_ZSTENCILCNTL:
317 switch (idx_value & 0xf) { 327 switch (idx_value & 0xf) {
@@ -329,6 +339,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
329 default: 339 default:
330 break; 340 break;
331 } 341 }
342 track->zb_dirty = true;
332 break; 343 break;
333 case RADEON_RB3D_ZPASS_ADDR: 344 case RADEON_RB3D_ZPASS_ADDR:
334 r = r100_cs_packet_next_reloc(p, &reloc); 345 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -345,6 +356,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
345 uint32_t temp = idx_value >> 4; 356 uint32_t temp = idx_value >> 4;
346 for (i = 0; i < track->num_texture; i++) 357 for (i = 0; i < track->num_texture; i++)
347 track->textures[i].enabled = !!(temp & (1 << i)); 358 track->textures[i].enabled = !!(temp & (1 << i));
359 track->tex_dirty = true;
348 } 360 }
349 break; 361 break;
350 case RADEON_SE_VF_CNTL: 362 case RADEON_SE_VF_CNTL:
@@ -369,6 +381,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
369 i = (reg - R200_PP_TXSIZE_0) / 32; 381 i = (reg - R200_PP_TXSIZE_0) / 32;
370 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 382 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
371 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 383 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
384 track->tex_dirty = true;
372 break; 385 break;
373 case R200_PP_TXPITCH_0: 386 case R200_PP_TXPITCH_0:
374 case R200_PP_TXPITCH_1: 387 case R200_PP_TXPITCH_1:
@@ -378,6 +391,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
378 case R200_PP_TXPITCH_5: 391 case R200_PP_TXPITCH_5:
379 i = (reg - R200_PP_TXPITCH_0) / 32; 392 i = (reg - R200_PP_TXPITCH_0) / 32;
380 track->textures[i].pitch = idx_value + 32; 393 track->textures[i].pitch = idx_value + 32;
394 track->tex_dirty = true;
381 break; 395 break;
382 case R200_PP_TXFILTER_0: 396 case R200_PP_TXFILTER_0:
383 case R200_PP_TXFILTER_1: 397 case R200_PP_TXFILTER_1:
@@ -394,6 +408,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
394 tmp = (idx_value >> 27) & 0x7; 408 tmp = (idx_value >> 27) & 0x7;
395 if (tmp == 2 || tmp == 6) 409 if (tmp == 2 || tmp == 6)
396 track->textures[i].roundup_h = false; 410 track->textures[i].roundup_h = false;
411 track->tex_dirty = true;
397 break; 412 break;
398 case R200_PP_TXMULTI_CTL_0: 413 case R200_PP_TXMULTI_CTL_0:
399 case R200_PP_TXMULTI_CTL_1: 414 case R200_PP_TXMULTI_CTL_1:
@@ -432,6 +447,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
432 track->textures[i].tex_coord_type = 1; 447 track->textures[i].tex_coord_type = 1;
433 break; 448 break;
434 } 449 }
450 track->tex_dirty = true;
435 break; 451 break;
436 case R200_PP_TXFORMAT_0: 452 case R200_PP_TXFORMAT_0:
437 case R200_PP_TXFORMAT_1: 453 case R200_PP_TXFORMAT_1:
@@ -488,6 +504,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
488 } 504 }
489 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 505 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
490 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 506 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
507 track->tex_dirty = true;
491 break; 508 break;
492 case R200_PP_CUBIC_FACES_0: 509 case R200_PP_CUBIC_FACES_0:
493 case R200_PP_CUBIC_FACES_1: 510 case R200_PP_CUBIC_FACES_1:
@@ -501,6 +518,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
501 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 518 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
502 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 519 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
503 } 520 }
521 track->tex_dirty = true;
504 break; 522 break;
505 default: 523 default:
506 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 524 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",