aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r300.c
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-02-12 13:21:35 -0500
committerDave Airlie <airlied@redhat.com>2011-02-13 18:23:27 -0500
commit40b4a7599d5555b408e594f4c8dae8015ccaae8f (patch)
tree7241f131fe800f67b49b0d66241609b3e171270d /drivers/gpu/drm/radeon/r300.c
parent01e2f533a234dc62d16c0d3d4fb9d71cf1ce50c3 (diff)
drm/radeon/kms: optimize CS state checking for r100->r500
The colorbuffer, zbuffer, and texture states are checked only once when they get changed. This improves performance in the apps which emit lots of draw packets and few state changes. This drops performance in glxgears by a 1% or so, but glxgears is not a benchmark we care about. The time spent in the kernel when running Torcs dropped from 33% to 23% and the frame rate is higher, which is a good thing. r600 might need something like this as well. Signed-off-by: Marek Olšák <maraeo@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r300.c')
-rw-r--r--drivers/gpu/drm/radeon/r300.c20
1 files changed, 19 insertions, 1 deletions
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55fe5ba7def..15f94648f27 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -667,6 +667,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
667 } 667 }
668 track->cb[i].robj = reloc->robj; 668 track->cb[i].robj = reloc->robj;
669 track->cb[i].offset = idx_value; 669 track->cb[i].offset = idx_value;
670 track->cb_dirty = true;
670 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 671 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
671 break; 672 break;
672 case R300_ZB_DEPTHOFFSET: 673 case R300_ZB_DEPTHOFFSET:
@@ -679,6 +680,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
679 } 680 }
680 track->zb.robj = reloc->robj; 681 track->zb.robj = reloc->robj;
681 track->zb.offset = idx_value; 682 track->zb.offset = idx_value;
683 track->zb_dirty = true;
682 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 684 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
683 break; 685 break;
684 case R300_TX_OFFSET_0: 686 case R300_TX_OFFSET_0:
@@ -717,6 +719,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
717 tmp |= tile_flags; 719 tmp |= tile_flags;
718 ib[idx] = tmp; 720 ib[idx] = tmp;
719 track->textures[i].robj = reloc->robj; 721 track->textures[i].robj = reloc->robj;
722 track->tex_dirty = true;
720 break; 723 break;
721 /* Tracked registers */ 724 /* Tracked registers */
722 case 0x2084: 725 case 0x2084:
@@ -743,6 +746,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
743 if (p->rdev->family < CHIP_RV515) { 746 if (p->rdev->family < CHIP_RV515) {
744 track->maxy -= 1440; 747 track->maxy -= 1440;
745 } 748 }
749 track->cb_dirty = true;
750 track->zb_dirty = true;
746 break; 751 break;
747 case 0x4E00: 752 case 0x4E00:
748 /* RB3D_CCTL */ 753 /* RB3D_CCTL */
@@ -752,6 +757,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
752 return -EINVAL; 757 return -EINVAL;
753 } 758 }
754 track->num_cb = ((idx_value >> 5) & 0x3) + 1; 759 track->num_cb = ((idx_value >> 5) & 0x3) + 1;
760 track->cb_dirty = true;
755 break; 761 break;
756 case 0x4E38: 762 case 0x4E38:
757 case 0x4E3C: 763 case 0x4E3C:
@@ -814,6 +820,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
814 ((idx_value >> 21) & 0xF)); 820 ((idx_value >> 21) & 0xF));
815 return -EINVAL; 821 return -EINVAL;
816 } 822 }
823 track->cb_dirty = true;
817 break; 824 break;
818 case 0x4F00: 825 case 0x4F00:
819 /* ZB_CNTL */ 826 /* ZB_CNTL */
@@ -822,6 +829,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
822 } else { 829 } else {
823 track->z_enabled = false; 830 track->z_enabled = false;
824 } 831 }
832 track->zb_dirty = true;
825 break; 833 break;
826 case 0x4F10: 834 case 0x4F10:
827 /* ZB_FORMAT */ 835 /* ZB_FORMAT */
@@ -838,6 +846,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
838 (idx_value & 0xF)); 846 (idx_value & 0xF));
839 return -EINVAL; 847 return -EINVAL;
840 } 848 }
849 track->zb_dirty = true;
841 break; 850 break;
842 case 0x4F24: 851 case 0x4F24:
843 /* ZB_DEPTHPITCH */ 852 /* ZB_DEPTHPITCH */
@@ -861,6 +870,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
861 ib[idx] = tmp; 870 ib[idx] = tmp;
862 871
863 track->zb.pitch = idx_value & 0x3FFC; 872 track->zb.pitch = idx_value & 0x3FFC;
873 track->zb_dirty = true;
864 break; 874 break;
865 case 0x4104: 875 case 0x4104:
866 for (i = 0; i < 16; i++) { 876 for (i = 0; i < 16; i++) {
@@ -869,6 +879,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
869 enabled = !!(idx_value & (1 << i)); 879 enabled = !!(idx_value & (1 << i));
870 track->textures[i].enabled = enabled; 880 track->textures[i].enabled = enabled;
871 } 881 }
882 track->tex_dirty = true;
872 break; 883 break;
873 case 0x44C0: 884 case 0x44C0:
874 case 0x44C4: 885 case 0x44C4:
@@ -951,8 +962,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
951 DRM_ERROR("Invalid texture format %u\n", 962 DRM_ERROR("Invalid texture format %u\n",
952 (idx_value & 0x1F)); 963 (idx_value & 0x1F));
953 return -EINVAL; 964 return -EINVAL;
954 break;
955 } 965 }
966 track->tex_dirty = true;
956 break; 967 break;
957 case 0x4400: 968 case 0x4400:
958 case 0x4404: 969 case 0x4404:
@@ -980,6 +991,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
980 if (tmp == 2 || tmp == 4 || tmp == 6) { 991 if (tmp == 2 || tmp == 4 || tmp == 6) {
981 track->textures[i].roundup_h = false; 992 track->textures[i].roundup_h = false;
982 } 993 }
994 track->tex_dirty = true;
983 break; 995 break;
984 case 0x4500: 996 case 0x4500:
985 case 0x4504: 997 case 0x4504:
@@ -1017,6 +1029,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1017 DRM_ERROR("Forbidden bit TXFORMAT_MSB\n"); 1029 DRM_ERROR("Forbidden bit TXFORMAT_MSB\n");
1018 return -EINVAL; 1030 return -EINVAL;
1019 } 1031 }
1032 track->tex_dirty = true;
1020 break; 1033 break;
1021 case 0x4480: 1034 case 0x4480:
1022 case 0x4484: 1035 case 0x4484:
@@ -1046,6 +1059,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1046 track->textures[i].use_pitch = !!tmp; 1059 track->textures[i].use_pitch = !!tmp;
1047 tmp = (idx_value >> 22) & 0xF; 1060 tmp = (idx_value >> 22) & 0xF;
1048 track->textures[i].txdepth = tmp; 1061 track->textures[i].txdepth = tmp;
1062 track->tex_dirty = true;
1049 break; 1063 break;
1050 case R300_ZB_ZPASS_ADDR: 1064 case R300_ZB_ZPASS_ADDR:
1051 r = r100_cs_packet_next_reloc(p, &reloc); 1065 r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1060,6 +1074,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1060 case 0x4e0c: 1074 case 0x4e0c:
1061 /* RB3D_COLOR_CHANNEL_MASK */ 1075 /* RB3D_COLOR_CHANNEL_MASK */
1062 track->color_channel_mask = idx_value; 1076 track->color_channel_mask = idx_value;
1077 track->cb_dirty = true;
1063 break; 1078 break;
1064 case 0x43a4: 1079 case 0x43a4:
1065 /* SC_HYPERZ_EN */ 1080 /* SC_HYPERZ_EN */
@@ -1073,6 +1088,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1073 case 0x4f1c: 1088 case 0x4f1c:
1074 /* ZB_BW_CNTL */ 1089 /* ZB_BW_CNTL */
1075 track->zb_cb_clear = !!(idx_value & (1 << 5)); 1090 track->zb_cb_clear = !!(idx_value & (1 << 5));
1091 track->cb_dirty = true;
1092 track->zb_dirty = true;
1076 if (p->rdev->hyperz_filp != p->filp) { 1093 if (p->rdev->hyperz_filp != p->filp) {
1077 if (idx_value & (R300_HIZ_ENABLE | 1094 if (idx_value & (R300_HIZ_ENABLE |
1078 R300_RD_COMP_ENABLE | 1095 R300_RD_COMP_ENABLE |
@@ -1084,6 +1101,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
1084 case 0x4e04: 1101 case 0x4e04:
1085 /* RB3D_BLENDCNTL */ 1102 /* RB3D_BLENDCNTL */
1086 track->blend_read_enable = !!(idx_value & (1 << 2)); 1103 track->blend_read_enable = !!(idx_value & (1 << 2));
1104 track->cb_dirty = true;
1087 break; 1105 break;
1088 case 0x4f28: /* ZB_DEPTHCLEARVALUE */ 1106 case 0x4f28: /* ZB_DEPTHCLEARVALUE */
1089 break; 1107 break;