aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r100.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2012-06-28 17:50:34 -0400
committerDave Airlie <airlied@redhat.com>2012-06-29 10:14:38 -0400
commit0242f74d29df00ea97a6377e3c66f14efbb340d3 (patch)
tree7d213c21198788210634e318bc916687a61a70f8 /drivers/gpu/drm/radeon/r100.c
parent4391b2cf4b09954b6e8ff31bf63826115fc149f0 (diff)
drm/radeon: clean up CS functions in r100.c
Consolidate the CS functions to one section of the file. Previously they were spread all around. Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r100.c')
-rw-r--r--drivers/gpu/drm/radeon/r100.c959
1 files changed, 479 insertions, 480 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 35825bf1e790..3fa82e1b9428 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -103,112 +103,6 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
103 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 103 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
104 */ 104 */
105 105
106int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
107 struct radeon_cs_packet *pkt,
108 unsigned idx,
109 unsigned reg)
110{
111 int r;
112 u32 tile_flags = 0;
113 u32 tmp;
114 struct radeon_cs_reloc *reloc;
115 u32 value;
116
117 r = r100_cs_packet_next_reloc(p, &reloc);
118 if (r) {
119 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
120 idx, reg);
121 r100_cs_dump_packet(p, pkt);
122 return r;
123 }
124
125 value = radeon_get_ib_value(p, idx);
126 tmp = value & 0x003fffff;
127 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
128
129 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
130 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
131 tile_flags |= RADEON_DST_TILE_MACRO;
132 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
133 if (reg == RADEON_SRC_PITCH_OFFSET) {
134 DRM_ERROR("Cannot src blit from microtiled surface\n");
135 r100_cs_dump_packet(p, pkt);
136 return -EINVAL;
137 }
138 tile_flags |= RADEON_DST_TILE_MICRO;
139 }
140
141 tmp |= tile_flags;
142 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
143 } else
144 p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
145 return 0;
146}
147
148int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
149 struct radeon_cs_packet *pkt,
150 int idx)
151{
152 unsigned c, i;
153 struct radeon_cs_reloc *reloc;
154 struct r100_cs_track *track;
155 int r = 0;
156 volatile uint32_t *ib;
157 u32 idx_value;
158
159 ib = p->ib.ptr;
160 track = (struct r100_cs_track *)p->track;
161 c = radeon_get_ib_value(p, idx++) & 0x1F;
162 if (c > 16) {
163 DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
164 pkt->opcode);
165 r100_cs_dump_packet(p, pkt);
166 return -EINVAL;
167 }
168 track->num_arrays = c;
169 for (i = 0; i < (c - 1); i+=2, idx+=3) {
170 r = r100_cs_packet_next_reloc(p, &reloc);
171 if (r) {
172 DRM_ERROR("No reloc for packet3 %d\n",
173 pkt->opcode);
174 r100_cs_dump_packet(p, pkt);
175 return r;
176 }
177 idx_value = radeon_get_ib_value(p, idx);
178 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
179
180 track->arrays[i + 0].esize = idx_value >> 8;
181 track->arrays[i + 0].robj = reloc->robj;
182 track->arrays[i + 0].esize &= 0x7F;
183 r = r100_cs_packet_next_reloc(p, &reloc);
184 if (r) {
185 DRM_ERROR("No reloc for packet3 %d\n",
186 pkt->opcode);
187 r100_cs_dump_packet(p, pkt);
188 return r;
189 }
190 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
191 track->arrays[i + 1].robj = reloc->robj;
192 track->arrays[i + 1].esize = idx_value >> 24;
193 track->arrays[i + 1].esize &= 0x7F;
194 }
195 if (c & 1) {
196 r = r100_cs_packet_next_reloc(p, &reloc);
197 if (r) {
198 DRM_ERROR("No reloc for packet3 %d\n",
199 pkt->opcode);
200 r100_cs_dump_packet(p, pkt);
201 return r;
202 }
203 idx_value = radeon_get_ib_value(p, idx);
204 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
205 track->arrays[i + 0].robj = reloc->robj;
206 track->arrays[i + 0].esize = idx_value >> 8;
207 track->arrays[i + 0].esize &= 0x7F;
208 }
209 return r;
210}
211
212void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 106void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
213{ 107{
214 /* enable the pflip int */ 108 /* enable the pflip int */
@@ -1206,6 +1100,112 @@ void r100_cp_disable(struct radeon_device *rdev)
1206/* 1100/*
1207 * CS functions 1101 * CS functions
1208 */ 1102 */
1103int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1104 struct radeon_cs_packet *pkt,
1105 unsigned idx,
1106 unsigned reg)
1107{
1108 int r;
1109 u32 tile_flags = 0;
1110 u32 tmp;
1111 struct radeon_cs_reloc *reloc;
1112 u32 value;
1113
1114 r = r100_cs_packet_next_reloc(p, &reloc);
1115 if (r) {
1116 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1117 idx, reg);
1118 r100_cs_dump_packet(p, pkt);
1119 return r;
1120 }
1121
1122 value = radeon_get_ib_value(p, idx);
1123 tmp = value & 0x003fffff;
1124 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1125
1126 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1127 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1128 tile_flags |= RADEON_DST_TILE_MACRO;
1129 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1130 if (reg == RADEON_SRC_PITCH_OFFSET) {
1131 DRM_ERROR("Cannot src blit from microtiled surface\n");
1132 r100_cs_dump_packet(p, pkt);
1133 return -EINVAL;
1134 }
1135 tile_flags |= RADEON_DST_TILE_MICRO;
1136 }
1137
1138 tmp |= tile_flags;
1139 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1140 } else
1141 p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1142 return 0;
1143}
1144
1145int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1146 struct radeon_cs_packet *pkt,
1147 int idx)
1148{
1149 unsigned c, i;
1150 struct radeon_cs_reloc *reloc;
1151 struct r100_cs_track *track;
1152 int r = 0;
1153 volatile uint32_t *ib;
1154 u32 idx_value;
1155
1156 ib = p->ib.ptr;
1157 track = (struct r100_cs_track *)p->track;
1158 c = radeon_get_ib_value(p, idx++) & 0x1F;
1159 if (c > 16) {
1160 DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1161 pkt->opcode);
1162 r100_cs_dump_packet(p, pkt);
1163 return -EINVAL;
1164 }
1165 track->num_arrays = c;
1166 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1167 r = r100_cs_packet_next_reloc(p, &reloc);
1168 if (r) {
1169 DRM_ERROR("No reloc for packet3 %d\n",
1170 pkt->opcode);
1171 r100_cs_dump_packet(p, pkt);
1172 return r;
1173 }
1174 idx_value = radeon_get_ib_value(p, idx);
1175 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1176
1177 track->arrays[i + 0].esize = idx_value >> 8;
1178 track->arrays[i + 0].robj = reloc->robj;
1179 track->arrays[i + 0].esize &= 0x7F;
1180 r = r100_cs_packet_next_reloc(p, &reloc);
1181 if (r) {
1182 DRM_ERROR("No reloc for packet3 %d\n",
1183 pkt->opcode);
1184 r100_cs_dump_packet(p, pkt);
1185 return r;
1186 }
1187 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
1188 track->arrays[i + 1].robj = reloc->robj;
1189 track->arrays[i + 1].esize = idx_value >> 24;
1190 track->arrays[i + 1].esize &= 0x7F;
1191 }
1192 if (c & 1) {
1193 r = r100_cs_packet_next_reloc(p, &reloc);
1194 if (r) {
1195 DRM_ERROR("No reloc for packet3 %d\n",
1196 pkt->opcode);
1197 r100_cs_dump_packet(p, pkt);
1198 return r;
1199 }
1200 idx_value = radeon_get_ib_value(p, idx);
1201 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1202 track->arrays[i + 0].robj = reloc->robj;
1203 track->arrays[i + 0].esize = idx_value >> 8;
1204 track->arrays[i + 0].esize &= 0x7F;
1205 }
1206 return r;
1207}
1208
1209int r100_cs_parse_packet0(struct radeon_cs_parser *p, 1209int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1210 struct radeon_cs_packet *pkt, 1210 struct radeon_cs_packet *pkt,
1211 const unsigned *auth, unsigned n, 1211 const unsigned *auth, unsigned n,
@@ -2031,6 +2031,379 @@ int r100_cs_parse(struct radeon_cs_parser *p)
2031 return 0; 2031 return 0;
2032} 2032}
2033 2033
2034static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2035{
2036 DRM_ERROR("pitch %d\n", t->pitch);
2037 DRM_ERROR("use_pitch %d\n", t->use_pitch);
2038 DRM_ERROR("width %d\n", t->width);
2039 DRM_ERROR("width_11 %d\n", t->width_11);
2040 DRM_ERROR("height %d\n", t->height);
2041 DRM_ERROR("height_11 %d\n", t->height_11);
2042 DRM_ERROR("num levels %d\n", t->num_levels);
2043 DRM_ERROR("depth %d\n", t->txdepth);
2044 DRM_ERROR("bpp %d\n", t->cpp);
2045 DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
2046 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
2047 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2048 DRM_ERROR("compress format %d\n", t->compress_format);
2049}
2050
2051static int r100_track_compress_size(int compress_format, int w, int h)
2052{
2053 int block_width, block_height, block_bytes;
2054 int wblocks, hblocks;
2055 int min_wblocks;
2056 int sz;
2057
2058 block_width = 4;
2059 block_height = 4;
2060
2061 switch (compress_format) {
2062 case R100_TRACK_COMP_DXT1:
2063 block_bytes = 8;
2064 min_wblocks = 4;
2065 break;
2066 default:
2067 case R100_TRACK_COMP_DXT35:
2068 block_bytes = 16;
2069 min_wblocks = 2;
2070 break;
2071 }
2072
2073 hblocks = (h + block_height - 1) / block_height;
2074 wblocks = (w + block_width - 1) / block_width;
2075 if (wblocks < min_wblocks)
2076 wblocks = min_wblocks;
2077 sz = wblocks * hblocks * block_bytes;
2078 return sz;
2079}
2080
2081static int r100_cs_track_cube(struct radeon_device *rdev,
2082 struct r100_cs_track *track, unsigned idx)
2083{
2084 unsigned face, w, h;
2085 struct radeon_bo *cube_robj;
2086 unsigned long size;
2087 unsigned compress_format = track->textures[idx].compress_format;
2088
2089 for (face = 0; face < 5; face++) {
2090 cube_robj = track->textures[idx].cube_info[face].robj;
2091 w = track->textures[idx].cube_info[face].width;
2092 h = track->textures[idx].cube_info[face].height;
2093
2094 if (compress_format) {
2095 size = r100_track_compress_size(compress_format, w, h);
2096 } else
2097 size = w * h;
2098 size *= track->textures[idx].cpp;
2099
2100 size += track->textures[idx].cube_info[face].offset;
2101
2102 if (size > radeon_bo_size(cube_robj)) {
2103 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2104 size, radeon_bo_size(cube_robj));
2105 r100_cs_track_texture_print(&track->textures[idx]);
2106 return -1;
2107 }
2108 }
2109 return 0;
2110}
2111
2112static int r100_cs_track_texture_check(struct radeon_device *rdev,
2113 struct r100_cs_track *track)
2114{
2115 struct radeon_bo *robj;
2116 unsigned long size;
2117 unsigned u, i, w, h, d;
2118 int ret;
2119
2120 for (u = 0; u < track->num_texture; u++) {
2121 if (!track->textures[u].enabled)
2122 continue;
2123 if (track->textures[u].lookup_disable)
2124 continue;
2125 robj = track->textures[u].robj;
2126 if (robj == NULL) {
2127 DRM_ERROR("No texture bound to unit %u\n", u);
2128 return -EINVAL;
2129 }
2130 size = 0;
2131 for (i = 0; i <= track->textures[u].num_levels; i++) {
2132 if (track->textures[u].use_pitch) {
2133 if (rdev->family < CHIP_R300)
2134 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2135 else
2136 w = track->textures[u].pitch / (1 << i);
2137 } else {
2138 w = track->textures[u].width;
2139 if (rdev->family >= CHIP_RV515)
2140 w |= track->textures[u].width_11;
2141 w = w / (1 << i);
2142 if (track->textures[u].roundup_w)
2143 w = roundup_pow_of_two(w);
2144 }
2145 h = track->textures[u].height;
2146 if (rdev->family >= CHIP_RV515)
2147 h |= track->textures[u].height_11;
2148 h = h / (1 << i);
2149 if (track->textures[u].roundup_h)
2150 h = roundup_pow_of_two(h);
2151 if (track->textures[u].tex_coord_type == 1) {
2152 d = (1 << track->textures[u].txdepth) / (1 << i);
2153 if (!d)
2154 d = 1;
2155 } else {
2156 d = 1;
2157 }
2158 if (track->textures[u].compress_format) {
2159
2160 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2161 /* compressed textures are block based */
2162 } else
2163 size += w * h * d;
2164 }
2165 size *= track->textures[u].cpp;
2166
2167 switch (track->textures[u].tex_coord_type) {
2168 case 0:
2169 case 1:
2170 break;
2171 case 2:
2172 if (track->separate_cube) {
2173 ret = r100_cs_track_cube(rdev, track, u);
2174 if (ret)
2175 return ret;
2176 } else
2177 size *= 6;
2178 break;
2179 default:
2180 DRM_ERROR("Invalid texture coordinate type %u for unit "
2181 "%u\n", track->textures[u].tex_coord_type, u);
2182 return -EINVAL;
2183 }
2184 if (size > radeon_bo_size(robj)) {
2185 DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2186 "%lu\n", u, size, radeon_bo_size(robj));
2187 r100_cs_track_texture_print(&track->textures[u]);
2188 return -EINVAL;
2189 }
2190 }
2191 return 0;
2192}
2193
2194int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2195{
2196 unsigned i;
2197 unsigned long size;
2198 unsigned prim_walk;
2199 unsigned nverts;
2200 unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2201
2202 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2203 !track->blend_read_enable)
2204 num_cb = 0;
2205
2206 for (i = 0; i < num_cb; i++) {
2207 if (track->cb[i].robj == NULL) {
2208 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2209 return -EINVAL;
2210 }
2211 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2212 size += track->cb[i].offset;
2213 if (size > radeon_bo_size(track->cb[i].robj)) {
2214 DRM_ERROR("[drm] Buffer too small for color buffer %d "
2215 "(need %lu have %lu) !\n", i, size,
2216 radeon_bo_size(track->cb[i].robj));
2217 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2218 i, track->cb[i].pitch, track->cb[i].cpp,
2219 track->cb[i].offset, track->maxy);
2220 return -EINVAL;
2221 }
2222 }
2223 track->cb_dirty = false;
2224
2225 if (track->zb_dirty && track->z_enabled) {
2226 if (track->zb.robj == NULL) {
2227 DRM_ERROR("[drm] No buffer for z buffer !\n");
2228 return -EINVAL;
2229 }
2230 size = track->zb.pitch * track->zb.cpp * track->maxy;
2231 size += track->zb.offset;
2232 if (size > radeon_bo_size(track->zb.robj)) {
2233 DRM_ERROR("[drm] Buffer too small for z buffer "
2234 "(need %lu have %lu) !\n", size,
2235 radeon_bo_size(track->zb.robj));
2236 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2237 track->zb.pitch, track->zb.cpp,
2238 track->zb.offset, track->maxy);
2239 return -EINVAL;
2240 }
2241 }
2242 track->zb_dirty = false;
2243
2244 if (track->aa_dirty && track->aaresolve) {
2245 if (track->aa.robj == NULL) {
2246 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2247 return -EINVAL;
2248 }
2249 /* I believe the format comes from colorbuffer0. */
2250 size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2251 size += track->aa.offset;
2252 if (size > radeon_bo_size(track->aa.robj)) {
2253 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2254 "(need %lu have %lu) !\n", i, size,
2255 radeon_bo_size(track->aa.robj));
2256 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2257 i, track->aa.pitch, track->cb[0].cpp,
2258 track->aa.offset, track->maxy);
2259 return -EINVAL;
2260 }
2261 }
2262 track->aa_dirty = false;
2263
2264 prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2265 if (track->vap_vf_cntl & (1 << 14)) {
2266 nverts = track->vap_alt_nverts;
2267 } else {
2268 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2269 }
2270 switch (prim_walk) {
2271 case 1:
2272 for (i = 0; i < track->num_arrays; i++) {
2273 size = track->arrays[i].esize * track->max_indx * 4;
2274 if (track->arrays[i].robj == NULL) {
2275 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2276 "bound\n", prim_walk, i);
2277 return -EINVAL;
2278 }
2279 if (size > radeon_bo_size(track->arrays[i].robj)) {
2280 dev_err(rdev->dev, "(PW %u) Vertex array %u "
2281 "need %lu dwords have %lu dwords\n",
2282 prim_walk, i, size >> 2,
2283 radeon_bo_size(track->arrays[i].robj)
2284 >> 2);
2285 DRM_ERROR("Max indices %u\n", track->max_indx);
2286 return -EINVAL;
2287 }
2288 }
2289 break;
2290 case 2:
2291 for (i = 0; i < track->num_arrays; i++) {
2292 size = track->arrays[i].esize * (nverts - 1) * 4;
2293 if (track->arrays[i].robj == NULL) {
2294 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2295 "bound\n", prim_walk, i);
2296 return -EINVAL;
2297 }
2298 if (size > radeon_bo_size(track->arrays[i].robj)) {
2299 dev_err(rdev->dev, "(PW %u) Vertex array %u "
2300 "need %lu dwords have %lu dwords\n",
2301 prim_walk, i, size >> 2,
2302 radeon_bo_size(track->arrays[i].robj)
2303 >> 2);
2304 return -EINVAL;
2305 }
2306 }
2307 break;
2308 case 3:
2309 size = track->vtx_size * nverts;
2310 if (size != track->immd_dwords) {
2311 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2312 track->immd_dwords, size);
2313 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2314 nverts, track->vtx_size);
2315 return -EINVAL;
2316 }
2317 break;
2318 default:
2319 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2320 prim_walk);
2321 return -EINVAL;
2322 }
2323
2324 if (track->tex_dirty) {
2325 track->tex_dirty = false;
2326 return r100_cs_track_texture_check(rdev, track);
2327 }
2328 return 0;
2329}
2330
2331void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2332{
2333 unsigned i, face;
2334
2335 track->cb_dirty = true;
2336 track->zb_dirty = true;
2337 track->tex_dirty = true;
2338 track->aa_dirty = true;
2339
2340 if (rdev->family < CHIP_R300) {
2341 track->num_cb = 1;
2342 if (rdev->family <= CHIP_RS200)
2343 track->num_texture = 3;
2344 else
2345 track->num_texture = 6;
2346 track->maxy = 2048;
2347 track->separate_cube = 1;
2348 } else {
2349 track->num_cb = 4;
2350 track->num_texture = 16;
2351 track->maxy = 4096;
2352 track->separate_cube = 0;
2353 track->aaresolve = false;
2354 track->aa.robj = NULL;
2355 }
2356
2357 for (i = 0; i < track->num_cb; i++) {
2358 track->cb[i].robj = NULL;
2359 track->cb[i].pitch = 8192;
2360 track->cb[i].cpp = 16;
2361 track->cb[i].offset = 0;
2362 }
2363 track->z_enabled = true;
2364 track->zb.robj = NULL;
2365 track->zb.pitch = 8192;
2366 track->zb.cpp = 4;
2367 track->zb.offset = 0;
2368 track->vtx_size = 0x7F;
2369 track->immd_dwords = 0xFFFFFFFFUL;
2370 track->num_arrays = 11;
2371 track->max_indx = 0x00FFFFFFUL;
2372 for (i = 0; i < track->num_arrays; i++) {
2373 track->arrays[i].robj = NULL;
2374 track->arrays[i].esize = 0x7F;
2375 }
2376 for (i = 0; i < track->num_texture; i++) {
2377 track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2378 track->textures[i].pitch = 16536;
2379 track->textures[i].width = 16536;
2380 track->textures[i].height = 16536;
2381 track->textures[i].width_11 = 1 << 11;
2382 track->textures[i].height_11 = 1 << 11;
2383 track->textures[i].num_levels = 12;
2384 if (rdev->family <= CHIP_RS200) {
2385 track->textures[i].tex_coord_type = 0;
2386 track->textures[i].txdepth = 0;
2387 } else {
2388 track->textures[i].txdepth = 16;
2389 track->textures[i].tex_coord_type = 1;
2390 }
2391 track->textures[i].cpp = 64;
2392 track->textures[i].robj = NULL;
2393 /* CS IB emission code makes sure texture unit are disabled */
2394 track->textures[i].enabled = false;
2395 track->textures[i].lookup_disable = false;
2396 track->textures[i].roundup_w = true;
2397 track->textures[i].roundup_h = true;
2398 if (track->separate_cube)
2399 for (face = 0; face < 5; face++) {
2400 track->textures[i].cube_info[face].robj = NULL;
2401 track->textures[i].cube_info[face].width = 16536;
2402 track->textures[i].cube_info[face].height = 16536;
2403 track->textures[i].cube_info[face].offset = 0;
2404 }
2405 }
2406}
2034 2407
2035/* 2408/*
2036 * Global GPU functions 2409 * Global GPU functions
@@ -3244,380 +3617,6 @@ void r100_bandwidth_update(struct radeon_device *rdev)
3244 } 3617 }
3245} 3618}
3246 3619
3247static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
3248{
3249 DRM_ERROR("pitch %d\n", t->pitch);
3250 DRM_ERROR("use_pitch %d\n", t->use_pitch);
3251 DRM_ERROR("width %d\n", t->width);
3252 DRM_ERROR("width_11 %d\n", t->width_11);
3253 DRM_ERROR("height %d\n", t->height);
3254 DRM_ERROR("height_11 %d\n", t->height_11);
3255 DRM_ERROR("num levels %d\n", t->num_levels);
3256 DRM_ERROR("depth %d\n", t->txdepth);
3257 DRM_ERROR("bpp %d\n", t->cpp);
3258 DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
3259 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
3260 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
3261 DRM_ERROR("compress format %d\n", t->compress_format);
3262}
3263
3264static int r100_track_compress_size(int compress_format, int w, int h)
3265{
3266 int block_width, block_height, block_bytes;
3267 int wblocks, hblocks;
3268 int min_wblocks;
3269 int sz;
3270
3271 block_width = 4;
3272 block_height = 4;
3273
3274 switch (compress_format) {
3275 case R100_TRACK_COMP_DXT1:
3276 block_bytes = 8;
3277 min_wblocks = 4;
3278 break;
3279 default:
3280 case R100_TRACK_COMP_DXT35:
3281 block_bytes = 16;
3282 min_wblocks = 2;
3283 break;
3284 }
3285
3286 hblocks = (h + block_height - 1) / block_height;
3287 wblocks = (w + block_width - 1) / block_width;
3288 if (wblocks < min_wblocks)
3289 wblocks = min_wblocks;
3290 sz = wblocks * hblocks * block_bytes;
3291 return sz;
3292}
3293
3294static int r100_cs_track_cube(struct radeon_device *rdev,
3295 struct r100_cs_track *track, unsigned idx)
3296{
3297 unsigned face, w, h;
3298 struct radeon_bo *cube_robj;
3299 unsigned long size;
3300 unsigned compress_format = track->textures[idx].compress_format;
3301
3302 for (face = 0; face < 5; face++) {
3303 cube_robj = track->textures[idx].cube_info[face].robj;
3304 w = track->textures[idx].cube_info[face].width;
3305 h = track->textures[idx].cube_info[face].height;
3306
3307 if (compress_format) {
3308 size = r100_track_compress_size(compress_format, w, h);
3309 } else
3310 size = w * h;
3311 size *= track->textures[idx].cpp;
3312
3313 size += track->textures[idx].cube_info[face].offset;
3314
3315 if (size > radeon_bo_size(cube_robj)) {
3316 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
3317 size, radeon_bo_size(cube_robj));
3318 r100_cs_track_texture_print(&track->textures[idx]);
3319 return -1;
3320 }
3321 }
3322 return 0;
3323}
3324
3325static int r100_cs_track_texture_check(struct radeon_device *rdev,
3326 struct r100_cs_track *track)
3327{
3328 struct radeon_bo *robj;
3329 unsigned long size;
3330 unsigned u, i, w, h, d;
3331 int ret;
3332
3333 for (u = 0; u < track->num_texture; u++) {
3334 if (!track->textures[u].enabled)
3335 continue;
3336 if (track->textures[u].lookup_disable)
3337 continue;
3338 robj = track->textures[u].robj;
3339 if (robj == NULL) {
3340 DRM_ERROR("No texture bound to unit %u\n", u);
3341 return -EINVAL;
3342 }
3343 size = 0;
3344 for (i = 0; i <= track->textures[u].num_levels; i++) {
3345 if (track->textures[u].use_pitch) {
3346 if (rdev->family < CHIP_R300)
3347 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
3348 else
3349 w = track->textures[u].pitch / (1 << i);
3350 } else {
3351 w = track->textures[u].width;
3352 if (rdev->family >= CHIP_RV515)
3353 w |= track->textures[u].width_11;
3354 w = w / (1 << i);
3355 if (track->textures[u].roundup_w)
3356 w = roundup_pow_of_two(w);
3357 }
3358 h = track->textures[u].height;
3359 if (rdev->family >= CHIP_RV515)
3360 h |= track->textures[u].height_11;
3361 h = h / (1 << i);
3362 if (track->textures[u].roundup_h)
3363 h = roundup_pow_of_two(h);
3364 if (track->textures[u].tex_coord_type == 1) {
3365 d = (1 << track->textures[u].txdepth) / (1 << i);
3366 if (!d)
3367 d = 1;
3368 } else {
3369 d = 1;
3370 }
3371 if (track->textures[u].compress_format) {
3372
3373 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
3374 /* compressed textures are block based */
3375 } else
3376 size += w * h * d;
3377 }
3378 size *= track->textures[u].cpp;
3379
3380 switch (track->textures[u].tex_coord_type) {
3381 case 0:
3382 case 1:
3383 break;
3384 case 2:
3385 if (track->separate_cube) {
3386 ret = r100_cs_track_cube(rdev, track, u);
3387 if (ret)
3388 return ret;
3389 } else
3390 size *= 6;
3391 break;
3392 default:
3393 DRM_ERROR("Invalid texture coordinate type %u for unit "
3394 "%u\n", track->textures[u].tex_coord_type, u);
3395 return -EINVAL;
3396 }
3397 if (size > radeon_bo_size(robj)) {
3398 DRM_ERROR("Texture of unit %u needs %lu bytes but is "
3399 "%lu\n", u, size, radeon_bo_size(robj));
3400 r100_cs_track_texture_print(&track->textures[u]);
3401 return -EINVAL;
3402 }
3403 }
3404 return 0;
3405}
3406
3407int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3408{
3409 unsigned i;
3410 unsigned long size;
3411 unsigned prim_walk;
3412 unsigned nverts;
3413 unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
3414
3415 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
3416 !track->blend_read_enable)
3417 num_cb = 0;
3418
3419 for (i = 0; i < num_cb; i++) {
3420 if (track->cb[i].robj == NULL) {
3421 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
3422 return -EINVAL;
3423 }
3424 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
3425 size += track->cb[i].offset;
3426 if (size > radeon_bo_size(track->cb[i].robj)) {
3427 DRM_ERROR("[drm] Buffer too small for color buffer %d "
3428 "(need %lu have %lu) !\n", i, size,
3429 radeon_bo_size(track->cb[i].robj));
3430 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
3431 i, track->cb[i].pitch, track->cb[i].cpp,
3432 track->cb[i].offset, track->maxy);
3433 return -EINVAL;
3434 }
3435 }
3436 track->cb_dirty = false;
3437
3438 if (track->zb_dirty && track->z_enabled) {
3439 if (track->zb.robj == NULL) {
3440 DRM_ERROR("[drm] No buffer for z buffer !\n");
3441 return -EINVAL;
3442 }
3443 size = track->zb.pitch * track->zb.cpp * track->maxy;
3444 size += track->zb.offset;
3445 if (size > radeon_bo_size(track->zb.robj)) {
3446 DRM_ERROR("[drm] Buffer too small for z buffer "
3447 "(need %lu have %lu) !\n", size,
3448 radeon_bo_size(track->zb.robj));
3449 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
3450 track->zb.pitch, track->zb.cpp,
3451 track->zb.offset, track->maxy);
3452 return -EINVAL;
3453 }
3454 }
3455 track->zb_dirty = false;
3456
3457 if (track->aa_dirty && track->aaresolve) {
3458 if (track->aa.robj == NULL) {
3459 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
3460 return -EINVAL;
3461 }
3462 /* I believe the format comes from colorbuffer0. */
3463 size = track->aa.pitch * track->cb[0].cpp * track->maxy;
3464 size += track->aa.offset;
3465 if (size > radeon_bo_size(track->aa.robj)) {
3466 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
3467 "(need %lu have %lu) !\n", i, size,
3468 radeon_bo_size(track->aa.robj));
3469 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
3470 i, track->aa.pitch, track->cb[0].cpp,
3471 track->aa.offset, track->maxy);
3472 return -EINVAL;
3473 }
3474 }
3475 track->aa_dirty = false;
3476
3477 prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
3478 if (track->vap_vf_cntl & (1 << 14)) {
3479 nverts = track->vap_alt_nverts;
3480 } else {
3481 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
3482 }
3483 switch (prim_walk) {
3484 case 1:
3485 for (i = 0; i < track->num_arrays; i++) {
3486 size = track->arrays[i].esize * track->max_indx * 4;
3487 if (track->arrays[i].robj == NULL) {
3488 DRM_ERROR("(PW %u) Vertex array %u no buffer "
3489 "bound\n", prim_walk, i);
3490 return -EINVAL;
3491 }
3492 if (size > radeon_bo_size(track->arrays[i].robj)) {
3493 dev_err(rdev->dev, "(PW %u) Vertex array %u "
3494 "need %lu dwords have %lu dwords\n",
3495 prim_walk, i, size >> 2,
3496 radeon_bo_size(track->arrays[i].robj)
3497 >> 2);
3498 DRM_ERROR("Max indices %u\n", track->max_indx);
3499 return -EINVAL;
3500 }
3501 }
3502 break;
3503 case 2:
3504 for (i = 0; i < track->num_arrays; i++) {
3505 size = track->arrays[i].esize * (nverts - 1) * 4;
3506 if (track->arrays[i].robj == NULL) {
3507 DRM_ERROR("(PW %u) Vertex array %u no buffer "
3508 "bound\n", prim_walk, i);
3509 return -EINVAL;
3510 }
3511 if (size > radeon_bo_size(track->arrays[i].robj)) {
3512 dev_err(rdev->dev, "(PW %u) Vertex array %u "
3513 "need %lu dwords have %lu dwords\n",
3514 prim_walk, i, size >> 2,
3515 radeon_bo_size(track->arrays[i].robj)
3516 >> 2);
3517 return -EINVAL;
3518 }
3519 }
3520 break;
3521 case 3:
3522 size = track->vtx_size * nverts;
3523 if (size != track->immd_dwords) {
3524 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
3525 track->immd_dwords, size);
3526 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
3527 nverts, track->vtx_size);
3528 return -EINVAL;
3529 }
3530 break;
3531 default:
3532 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
3533 prim_walk);
3534 return -EINVAL;
3535 }
3536
3537 if (track->tex_dirty) {
3538 track->tex_dirty = false;
3539 return r100_cs_track_texture_check(rdev, track);
3540 }
3541 return 0;
3542}
3543
3544void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
3545{
3546 unsigned i, face;
3547
3548 track->cb_dirty = true;
3549 track->zb_dirty = true;
3550 track->tex_dirty = true;
3551 track->aa_dirty = true;
3552
3553 if (rdev->family < CHIP_R300) {
3554 track->num_cb = 1;
3555 if (rdev->family <= CHIP_RS200)
3556 track->num_texture = 3;
3557 else
3558 track->num_texture = 6;
3559 track->maxy = 2048;
3560 track->separate_cube = 1;
3561 } else {
3562 track->num_cb = 4;
3563 track->num_texture = 16;
3564 track->maxy = 4096;
3565 track->separate_cube = 0;
3566 track->aaresolve = false;
3567 track->aa.robj = NULL;
3568 }
3569
3570 for (i = 0; i < track->num_cb; i++) {
3571 track->cb[i].robj = NULL;
3572 track->cb[i].pitch = 8192;
3573 track->cb[i].cpp = 16;
3574 track->cb[i].offset = 0;
3575 }
3576 track->z_enabled = true;
3577 track->zb.robj = NULL;
3578 track->zb.pitch = 8192;
3579 track->zb.cpp = 4;
3580 track->zb.offset = 0;
3581 track->vtx_size = 0x7F;
3582 track->immd_dwords = 0xFFFFFFFFUL;
3583 track->num_arrays = 11;
3584 track->max_indx = 0x00FFFFFFUL;
3585 for (i = 0; i < track->num_arrays; i++) {
3586 track->arrays[i].robj = NULL;
3587 track->arrays[i].esize = 0x7F;
3588 }
3589 for (i = 0; i < track->num_texture; i++) {
3590 track->textures[i].compress_format = R100_TRACK_COMP_NONE;
3591 track->textures[i].pitch = 16536;
3592 track->textures[i].width = 16536;
3593 track->textures[i].height = 16536;
3594 track->textures[i].width_11 = 1 << 11;
3595 track->textures[i].height_11 = 1 << 11;
3596 track->textures[i].num_levels = 12;
3597 if (rdev->family <= CHIP_RS200) {
3598 track->textures[i].tex_coord_type = 0;
3599 track->textures[i].txdepth = 0;
3600 } else {
3601 track->textures[i].txdepth = 16;
3602 track->textures[i].tex_coord_type = 1;
3603 }
3604 track->textures[i].cpp = 64;
3605 track->textures[i].robj = NULL;
3606 /* CS IB emission code makes sure texture unit are disabled */
3607 track->textures[i].enabled = false;
3608 track->textures[i].lookup_disable = false;
3609 track->textures[i].roundup_w = true;
3610 track->textures[i].roundup_h = true;
3611 if (track->separate_cube)
3612 for (face = 0; face < 5; face++) {
3613 track->textures[i].cube_info[face].robj = NULL;
3614 track->textures[i].cube_info[face].width = 16536;
3615 track->textures[i].cube_info[face].height = 16536;
3616 track->textures[i].cube_info[face].offset = 0;
3617 }
3618 }
3619}
3620
3621int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3620int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3622{ 3621{
3623 uint32_t scratch; 3622 uint32_t scratch;