diff options
author | Marek Olšák <maraeo@gmail.com> | 2011-02-12 13:21:35 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2011-02-13 18:23:27 -0500 |
commit | 40b4a7599d5555b408e594f4c8dae8015ccaae8f (patch) | |
tree | 7241f131fe800f67b49b0d66241609b3e171270d /drivers/gpu/drm/radeon/r300.c | |
parent | 01e2f533a234dc62d16c0d3d4fb9d71cf1ce50c3 (diff) |
drm/radeon/kms: optimize CS state checking for r100->r500
The colorbuffer, zbuffer, and texture states are checked only once when
they get changed. This improves performance in the apps which emit
lots of draw packets and few state changes.
This drops performance in glxgears by a 1% or so, but glxgears is not
a benchmark we care about.
The time spent in the kernel when running Torcs dropped from 33% to 23%
and the frame rate is higher, which is a good thing.
r600 might need something like this as well.
Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r300.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r300.c | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 55fe5ba7def..15f94648f27 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c | |||
@@ -667,6 +667,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
667 | } | 667 | } |
668 | track->cb[i].robj = reloc->robj; | 668 | track->cb[i].robj = reloc->robj; |
669 | track->cb[i].offset = idx_value; | 669 | track->cb[i].offset = idx_value; |
670 | track->cb_dirty = true; | ||
670 | ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); | 671 | ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); |
671 | break; | 672 | break; |
672 | case R300_ZB_DEPTHOFFSET: | 673 | case R300_ZB_DEPTHOFFSET: |
@@ -679,6 +680,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
679 | } | 680 | } |
680 | track->zb.robj = reloc->robj; | 681 | track->zb.robj = reloc->robj; |
681 | track->zb.offset = idx_value; | 682 | track->zb.offset = idx_value; |
683 | track->zb_dirty = true; | ||
682 | ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); | 684 | ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); |
683 | break; | 685 | break; |
684 | case R300_TX_OFFSET_0: | 686 | case R300_TX_OFFSET_0: |
@@ -717,6 +719,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
717 | tmp |= tile_flags; | 719 | tmp |= tile_flags; |
718 | ib[idx] = tmp; | 720 | ib[idx] = tmp; |
719 | track->textures[i].robj = reloc->robj; | 721 | track->textures[i].robj = reloc->robj; |
722 | track->tex_dirty = true; | ||
720 | break; | 723 | break; |
721 | /* Tracked registers */ | 724 | /* Tracked registers */ |
722 | case 0x2084: | 725 | case 0x2084: |
@@ -743,6 +746,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
743 | if (p->rdev->family < CHIP_RV515) { | 746 | if (p->rdev->family < CHIP_RV515) { |
744 | track->maxy -= 1440; | 747 | track->maxy -= 1440; |
745 | } | 748 | } |
749 | track->cb_dirty = true; | ||
750 | track->zb_dirty = true; | ||
746 | break; | 751 | break; |
747 | case 0x4E00: | 752 | case 0x4E00: |
748 | /* RB3D_CCTL */ | 753 | /* RB3D_CCTL */ |
@@ -752,6 +757,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
752 | return -EINVAL; | 757 | return -EINVAL; |
753 | } | 758 | } |
754 | track->num_cb = ((idx_value >> 5) & 0x3) + 1; | 759 | track->num_cb = ((idx_value >> 5) & 0x3) + 1; |
760 | track->cb_dirty = true; | ||
755 | break; | 761 | break; |
756 | case 0x4E38: | 762 | case 0x4E38: |
757 | case 0x4E3C: | 763 | case 0x4E3C: |
@@ -814,6 +820,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
814 | ((idx_value >> 21) & 0xF)); | 820 | ((idx_value >> 21) & 0xF)); |
815 | return -EINVAL; | 821 | return -EINVAL; |
816 | } | 822 | } |
823 | track->cb_dirty = true; | ||
817 | break; | 824 | break; |
818 | case 0x4F00: | 825 | case 0x4F00: |
819 | /* ZB_CNTL */ | 826 | /* ZB_CNTL */ |
@@ -822,6 +829,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
822 | } else { | 829 | } else { |
823 | track->z_enabled = false; | 830 | track->z_enabled = false; |
824 | } | 831 | } |
832 | track->zb_dirty = true; | ||
825 | break; | 833 | break; |
826 | case 0x4F10: | 834 | case 0x4F10: |
827 | /* ZB_FORMAT */ | 835 | /* ZB_FORMAT */ |
@@ -838,6 +846,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
838 | (idx_value & 0xF)); | 846 | (idx_value & 0xF)); |
839 | return -EINVAL; | 847 | return -EINVAL; |
840 | } | 848 | } |
849 | track->zb_dirty = true; | ||
841 | break; | 850 | break; |
842 | case 0x4F24: | 851 | case 0x4F24: |
843 | /* ZB_DEPTHPITCH */ | 852 | /* ZB_DEPTHPITCH */ |
@@ -861,6 +870,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
861 | ib[idx] = tmp; | 870 | ib[idx] = tmp; |
862 | 871 | ||
863 | track->zb.pitch = idx_value & 0x3FFC; | 872 | track->zb.pitch = idx_value & 0x3FFC; |
873 | track->zb_dirty = true; | ||
864 | break; | 874 | break; |
865 | case 0x4104: | 875 | case 0x4104: |
866 | for (i = 0; i < 16; i++) { | 876 | for (i = 0; i < 16; i++) { |
@@ -869,6 +879,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
869 | enabled = !!(idx_value & (1 << i)); | 879 | enabled = !!(idx_value & (1 << i)); |
870 | track->textures[i].enabled = enabled; | 880 | track->textures[i].enabled = enabled; |
871 | } | 881 | } |
882 | track->tex_dirty = true; | ||
872 | break; | 883 | break; |
873 | case 0x44C0: | 884 | case 0x44C0: |
874 | case 0x44C4: | 885 | case 0x44C4: |
@@ -951,8 +962,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
951 | DRM_ERROR("Invalid texture format %u\n", | 962 | DRM_ERROR("Invalid texture format %u\n", |
952 | (idx_value & 0x1F)); | 963 | (idx_value & 0x1F)); |
953 | return -EINVAL; | 964 | return -EINVAL; |
954 | break; | ||
955 | } | 965 | } |
966 | track->tex_dirty = true; | ||
956 | break; | 967 | break; |
957 | case 0x4400: | 968 | case 0x4400: |
958 | case 0x4404: | 969 | case 0x4404: |
@@ -980,6 +991,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
980 | if (tmp == 2 || tmp == 4 || tmp == 6) { | 991 | if (tmp == 2 || tmp == 4 || tmp == 6) { |
981 | track->textures[i].roundup_h = false; | 992 | track->textures[i].roundup_h = false; |
982 | } | 993 | } |
994 | track->tex_dirty = true; | ||
983 | break; | 995 | break; |
984 | case 0x4500: | 996 | case 0x4500: |
985 | case 0x4504: | 997 | case 0x4504: |
@@ -1017,6 +1029,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
1017 | DRM_ERROR("Forbidden bit TXFORMAT_MSB\n"); | 1029 | DRM_ERROR("Forbidden bit TXFORMAT_MSB\n"); |
1018 | return -EINVAL; | 1030 | return -EINVAL; |
1019 | } | 1031 | } |
1032 | track->tex_dirty = true; | ||
1020 | break; | 1033 | break; |
1021 | case 0x4480: | 1034 | case 0x4480: |
1022 | case 0x4484: | 1035 | case 0x4484: |
@@ -1046,6 +1059,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
1046 | track->textures[i].use_pitch = !!tmp; | 1059 | track->textures[i].use_pitch = !!tmp; |
1047 | tmp = (idx_value >> 22) & 0xF; | 1060 | tmp = (idx_value >> 22) & 0xF; |
1048 | track->textures[i].txdepth = tmp; | 1061 | track->textures[i].txdepth = tmp; |
1062 | track->tex_dirty = true; | ||
1049 | break; | 1063 | break; |
1050 | case R300_ZB_ZPASS_ADDR: | 1064 | case R300_ZB_ZPASS_ADDR: |
1051 | r = r100_cs_packet_next_reloc(p, &reloc); | 1065 | r = r100_cs_packet_next_reloc(p, &reloc); |
@@ -1060,6 +1074,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
1060 | case 0x4e0c: | 1074 | case 0x4e0c: |
1061 | /* RB3D_COLOR_CHANNEL_MASK */ | 1075 | /* RB3D_COLOR_CHANNEL_MASK */ |
1062 | track->color_channel_mask = idx_value; | 1076 | track->color_channel_mask = idx_value; |
1077 | track->cb_dirty = true; | ||
1063 | break; | 1078 | break; |
1064 | case 0x43a4: | 1079 | case 0x43a4: |
1065 | /* SC_HYPERZ_EN */ | 1080 | /* SC_HYPERZ_EN */ |
@@ -1073,6 +1088,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
1073 | case 0x4f1c: | 1088 | case 0x4f1c: |
1074 | /* ZB_BW_CNTL */ | 1089 | /* ZB_BW_CNTL */ |
1075 | track->zb_cb_clear = !!(idx_value & (1 << 5)); | 1090 | track->zb_cb_clear = !!(idx_value & (1 << 5)); |
1091 | track->cb_dirty = true; | ||
1092 | track->zb_dirty = true; | ||
1076 | if (p->rdev->hyperz_filp != p->filp) { | 1093 | if (p->rdev->hyperz_filp != p->filp) { |
1077 | if (idx_value & (R300_HIZ_ENABLE | | 1094 | if (idx_value & (R300_HIZ_ENABLE | |
1078 | R300_RD_COMP_ENABLE | | 1095 | R300_RD_COMP_ENABLE | |
@@ -1084,6 +1101,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, | |||
1084 | case 0x4e04: | 1101 | case 0x4e04: |
1085 | /* RB3D_BLENDCNTL */ | 1102 | /* RB3D_BLENDCNTL */ |
1086 | track->blend_read_enable = !!(idx_value & (1 << 2)); | 1103 | track->blend_read_enable = !!(idx_value & (1 << 2)); |
1104 | track->cb_dirty = true; | ||
1087 | break; | 1105 | break; |
1088 | case 0x4f28: /* ZB_DEPTHCLEARVALUE */ | 1106 | case 0x4f28: /* ZB_DEPTHCLEARVALUE */ |
1089 | break; | 1107 | break; |