diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600_cs.c | 747 |
1 files changed, 505 insertions, 242 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 250a3a918193..909bda8dd550 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c | |||
@@ -50,6 +50,7 @@ struct r600_cs_track { | |||
50 | u32 nsamples; | 50 | u32 nsamples; |
51 | u32 cb_color_base_last[8]; | 51 | u32 cb_color_base_last[8]; |
52 | struct radeon_bo *cb_color_bo[8]; | 52 | struct radeon_bo *cb_color_bo[8]; |
53 | u64 cb_color_bo_mc[8]; | ||
53 | u32 cb_color_bo_offset[8]; | 54 | u32 cb_color_bo_offset[8]; |
54 | struct radeon_bo *cb_color_frag_bo[8]; | 55 | struct radeon_bo *cb_color_frag_bo[8]; |
55 | struct radeon_bo *cb_color_tile_bo[8]; | 56 | struct radeon_bo *cb_color_tile_bo[8]; |
@@ -67,76 +68,239 @@ struct r600_cs_track { | |||
67 | u32 db_depth_size; | 68 | u32 db_depth_size; |
68 | u32 db_offset; | 69 | u32 db_offset; |
69 | struct radeon_bo *db_bo; | 70 | struct radeon_bo *db_bo; |
71 | u64 db_bo_mc; | ||
70 | }; | 72 | }; |
71 | 73 | ||
74 | #define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc, CHIP_R600 } | ||
75 | #define FMT_16_BIT(fmt, vc) [fmt] = { 1, 1, 2, vc, CHIP_R600 } | ||
76 | #define FMT_24_BIT(fmt) [fmt] = { 1, 1, 3, 0, CHIP_R600 } | ||
77 | #define FMT_32_BIT(fmt, vc) [fmt] = { 1, 1, 4, vc, CHIP_R600 } | ||
78 | #define FMT_48_BIT(fmt) [fmt] = { 1, 1, 6, 0, CHIP_R600 } | ||
79 | #define FMT_64_BIT(fmt, vc) [fmt] = { 1, 1, 8, vc, CHIP_R600 } | ||
80 | #define FMT_96_BIT(fmt) [fmt] = { 1, 1, 12, 0, CHIP_R600 } | ||
81 | #define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 } | ||
82 | |||
83 | struct gpu_formats { | ||
84 | unsigned blockwidth; | ||
85 | unsigned blockheight; | ||
86 | unsigned blocksize; | ||
87 | unsigned valid_color; | ||
88 | enum radeon_family min_family; | ||
89 | }; | ||
90 | |||
91 | static const struct gpu_formats color_formats_table[] = { | ||
92 | /* 8 bit */ | ||
93 | FMT_8_BIT(V_038004_COLOR_8, 1), | ||
94 | FMT_8_BIT(V_038004_COLOR_4_4, 1), | ||
95 | FMT_8_BIT(V_038004_COLOR_3_3_2, 1), | ||
96 | FMT_8_BIT(V_038004_FMT_1, 0), | ||
97 | |||
98 | /* 16-bit */ | ||
99 | FMT_16_BIT(V_038004_COLOR_16, 1), | ||
100 | FMT_16_BIT(V_038004_COLOR_16_FLOAT, 1), | ||
101 | FMT_16_BIT(V_038004_COLOR_8_8, 1), | ||
102 | FMT_16_BIT(V_038004_COLOR_5_6_5, 1), | ||
103 | FMT_16_BIT(V_038004_COLOR_6_5_5, 1), | ||
104 | FMT_16_BIT(V_038004_COLOR_1_5_5_5, 1), | ||
105 | FMT_16_BIT(V_038004_COLOR_4_4_4_4, 1), | ||
106 | FMT_16_BIT(V_038004_COLOR_5_5_5_1, 1), | ||
107 | |||
108 | /* 24-bit */ | ||
109 | FMT_24_BIT(V_038004_FMT_8_8_8), | ||
110 | |||
111 | /* 32-bit */ | ||
112 | FMT_32_BIT(V_038004_COLOR_32, 1), | ||
113 | FMT_32_BIT(V_038004_COLOR_32_FLOAT, 1), | ||
114 | FMT_32_BIT(V_038004_COLOR_16_16, 1), | ||
115 | FMT_32_BIT(V_038004_COLOR_16_16_FLOAT, 1), | ||
116 | FMT_32_BIT(V_038004_COLOR_8_24, 1), | ||
117 | FMT_32_BIT(V_038004_COLOR_8_24_FLOAT, 1), | ||
118 | FMT_32_BIT(V_038004_COLOR_24_8, 1), | ||
119 | FMT_32_BIT(V_038004_COLOR_24_8_FLOAT, 1), | ||
120 | FMT_32_BIT(V_038004_COLOR_10_11_11, 1), | ||
121 | FMT_32_BIT(V_038004_COLOR_10_11_11_FLOAT, 1), | ||
122 | FMT_32_BIT(V_038004_COLOR_11_11_10, 1), | ||
123 | FMT_32_BIT(V_038004_COLOR_11_11_10_FLOAT, 1), | ||
124 | FMT_32_BIT(V_038004_COLOR_2_10_10_10, 1), | ||
125 | FMT_32_BIT(V_038004_COLOR_8_8_8_8, 1), | ||
126 | FMT_32_BIT(V_038004_COLOR_10_10_10_2, 1), | ||
127 | FMT_32_BIT(V_038004_FMT_5_9_9_9_SHAREDEXP, 0), | ||
128 | FMT_32_BIT(V_038004_FMT_32_AS_8, 0), | ||
129 | FMT_32_BIT(V_038004_FMT_32_AS_8_8, 0), | ||
130 | |||
131 | /* 48-bit */ | ||
132 | FMT_48_BIT(V_038004_FMT_16_16_16), | ||
133 | FMT_48_BIT(V_038004_FMT_16_16_16_FLOAT), | ||
134 | |||
135 | /* 64-bit */ | ||
136 | FMT_64_BIT(V_038004_COLOR_X24_8_32_FLOAT, 1), | ||
137 | FMT_64_BIT(V_038004_COLOR_32_32, 1), | ||
138 | FMT_64_BIT(V_038004_COLOR_32_32_FLOAT, 1), | ||
139 | FMT_64_BIT(V_038004_COLOR_16_16_16_16, 1), | ||
140 | FMT_64_BIT(V_038004_COLOR_16_16_16_16_FLOAT, 1), | ||
141 | |||
142 | FMT_96_BIT(V_038004_FMT_32_32_32), | ||
143 | FMT_96_BIT(V_038004_FMT_32_32_32_FLOAT), | ||
144 | |||
145 | /* 128-bit */ | ||
146 | FMT_128_BIT(V_038004_COLOR_32_32_32_32, 1), | ||
147 | FMT_128_BIT(V_038004_COLOR_32_32_32_32_FLOAT, 1), | ||
148 | |||
149 | [V_038004_FMT_GB_GR] = { 2, 1, 4, 0 }, | ||
150 | [V_038004_FMT_BG_RG] = { 2, 1, 4, 0 }, | ||
151 | |||
152 | /* block compressed formats */ | ||
153 | [V_038004_FMT_BC1] = { 4, 4, 8, 0 }, | ||
154 | [V_038004_FMT_BC2] = { 4, 4, 16, 0 }, | ||
155 | [V_038004_FMT_BC3] = { 4, 4, 16, 0 }, | ||
156 | [V_038004_FMT_BC4] = { 4, 4, 8, 0 }, | ||
157 | [V_038004_FMT_BC5] = { 4, 4, 16, 0}, | ||
158 | [V_038004_FMT_BC6] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */ | ||
159 | [V_038004_FMT_BC7] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */ | ||
160 | |||
161 | /* The other Evergreen formats */ | ||
162 | [V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR}, | ||
163 | }; | ||
164 | |||
165 | static inline bool fmt_is_valid_color(u32 format) | ||
166 | { | ||
167 | if (format >= ARRAY_SIZE(color_formats_table)) | ||
168 | return false; | ||
169 | |||
170 | if (color_formats_table[format].valid_color) | ||
171 | return true; | ||
172 | |||
173 | return false; | ||
174 | } | ||
175 | |||
176 | static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family) | ||
177 | { | ||
178 | if (format >= ARRAY_SIZE(color_formats_table)) | ||
179 | return false; | ||
180 | |||
181 | if (family < color_formats_table[format].min_family) | ||
182 | return false; | ||
183 | |||
184 | if (color_formats_table[format].blockwidth > 0) | ||
185 | return true; | ||
186 | |||
187 | return false; | ||
188 | } | ||
189 | |||
190 | static inline int fmt_get_blocksize(u32 format) | ||
191 | { | ||
192 | if (format >= ARRAY_SIZE(color_formats_table)) | ||
193 | return 0; | ||
194 | |||
195 | return color_formats_table[format].blocksize; | ||
196 | } | ||
197 | |||
198 | static inline int fmt_get_nblocksx(u32 format, u32 w) | ||
199 | { | ||
200 | unsigned bw; | ||
201 | |||
202 | if (format >= ARRAY_SIZE(color_formats_table)) | ||
203 | return 0; | ||
204 | |||
205 | bw = color_formats_table[format].blockwidth; | ||
206 | if (bw == 0) | ||
207 | return 0; | ||
208 | |||
209 | return (w + bw - 1) / bw; | ||
210 | } | ||
211 | |||
212 | static inline int fmt_get_nblocksy(u32 format, u32 h) | ||
213 | { | ||
214 | unsigned bh; | ||
215 | |||
216 | if (format >= ARRAY_SIZE(color_formats_table)) | ||
217 | return 0; | ||
218 | |||
219 | bh = color_formats_table[format].blockheight; | ||
220 | if (bh == 0) | ||
221 | return 0; | ||
222 | |||
223 | return (h + bh - 1) / bh; | ||
224 | } | ||
225 | |||
72 | static inline int r600_bpe_from_format(u32 *bpe, u32 format) | 226 | static inline int r600_bpe_from_format(u32 *bpe, u32 format) |
73 | { | 227 | { |
74 | switch (format) { | 228 | unsigned res; |
75 | case V_038004_COLOR_8: | 229 | |
76 | case V_038004_COLOR_4_4: | 230 | if (format >= ARRAY_SIZE(color_formats_table)) |
77 | case V_038004_COLOR_3_3_2: | 231 | goto fail; |
78 | case V_038004_FMT_1: | 232 | |
79 | *bpe = 1; | 233 | res = color_formats_table[format].blocksize; |
80 | break; | 234 | if (res == 0) |
81 | case V_038004_COLOR_16: | 235 | goto fail; |
82 | case V_038004_COLOR_16_FLOAT: | 236 | |
83 | case V_038004_COLOR_8_8: | 237 | *bpe = res; |
84 | case V_038004_COLOR_5_6_5: | 238 | return 0; |
85 | case V_038004_COLOR_6_5_5: | 239 | |
86 | case V_038004_COLOR_1_5_5_5: | 240 | fail: |
87 | case V_038004_COLOR_4_4_4_4: | 241 | *bpe = 16; |
88 | case V_038004_COLOR_5_5_5_1: | 242 | return -EINVAL; |
89 | *bpe = 2; | 243 | } |
90 | break; | 244 | |
91 | case V_038004_FMT_8_8_8: | 245 | struct array_mode_checker { |
92 | *bpe = 3; | 246 | int array_mode; |
93 | break; | 247 | u32 group_size; |
94 | case V_038004_COLOR_32: | 248 | u32 nbanks; |
95 | case V_038004_COLOR_32_FLOAT: | 249 | u32 npipes; |
96 | case V_038004_COLOR_16_16: | 250 | u32 nsamples; |
97 | case V_038004_COLOR_16_16_FLOAT: | 251 | u32 blocksize; |
98 | case V_038004_COLOR_8_24: | 252 | }; |
99 | case V_038004_COLOR_8_24_FLOAT: | 253 | |
100 | case V_038004_COLOR_24_8: | 254 | /* returns alignment in pixels for pitch/height/depth and bytes for base */ |
101 | case V_038004_COLOR_24_8_FLOAT: | 255 | static inline int r600_get_array_mode_alignment(struct array_mode_checker *values, |
102 | case V_038004_COLOR_10_11_11: | 256 | u32 *pitch_align, |
103 | case V_038004_COLOR_10_11_11_FLOAT: | 257 | u32 *height_align, |
104 | case V_038004_COLOR_11_11_10: | 258 | u32 *depth_align, |
105 | case V_038004_COLOR_11_11_10_FLOAT: | 259 | u64 *base_align) |
106 | case V_038004_COLOR_2_10_10_10: | 260 | { |
107 | case V_038004_COLOR_8_8_8_8: | 261 | u32 tile_width = 8; |
108 | case V_038004_COLOR_10_10_10_2: | 262 | u32 tile_height = 8; |
109 | case V_038004_FMT_5_9_9_9_SHAREDEXP: | 263 | u32 macro_tile_width = values->nbanks; |
110 | case V_038004_FMT_32_AS_8: | 264 | u32 macro_tile_height = values->npipes; |
111 | case V_038004_FMT_32_AS_8_8: | 265 | u32 tile_bytes = tile_width * tile_height * values->blocksize * values->nsamples; |
112 | *bpe = 4; | 266 | u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes; |
113 | break; | 267 | |
114 | case V_038004_COLOR_X24_8_32_FLOAT: | 268 | switch (values->array_mode) { |
115 | case V_038004_COLOR_32_32: | 269 | case ARRAY_LINEAR_GENERAL: |
116 | case V_038004_COLOR_32_32_FLOAT: | 270 | /* technically tile_width/_height for pitch/height */ |
117 | case V_038004_COLOR_16_16_16_16: | 271 | *pitch_align = 1; /* tile_width */ |
118 | case V_038004_COLOR_16_16_16_16_FLOAT: | 272 | *height_align = 1; /* tile_height */ |
119 | *bpe = 8; | 273 | *depth_align = 1; |
274 | *base_align = 1; | ||
120 | break; | 275 | break; |
121 | case V_038004_FMT_16_16_16: | 276 | case ARRAY_LINEAR_ALIGNED: |
122 | case V_038004_FMT_16_16_16_FLOAT: | 277 | *pitch_align = max((u32)64, (u32)(values->group_size / values->blocksize)); |
123 | *bpe = 6; | 278 | *height_align = tile_height; |
279 | *depth_align = 1; | ||
280 | *base_align = values->group_size; | ||
124 | break; | 281 | break; |
125 | case V_038004_FMT_32_32_32: | 282 | case ARRAY_1D_TILED_THIN1: |
126 | case V_038004_FMT_32_32_32_FLOAT: | 283 | *pitch_align = max((u32)tile_width, |
127 | *bpe = 12; | 284 | (u32)(values->group_size / |
285 | (tile_height * values->blocksize * values->nsamples))); | ||
286 | *height_align = tile_height; | ||
287 | *depth_align = 1; | ||
288 | *base_align = values->group_size; | ||
128 | break; | 289 | break; |
129 | case V_038004_COLOR_32_32_32_32: | 290 | case ARRAY_2D_TILED_THIN1: |
130 | case V_038004_COLOR_32_32_32_32_FLOAT: | 291 | *pitch_align = max((u32)macro_tile_width, |
131 | *bpe = 16; | 292 | (u32)(((values->group_size / tile_height) / |
293 | (values->blocksize * values->nsamples)) * | ||
294 | values->nbanks)) * tile_width; | ||
295 | *height_align = macro_tile_height * tile_height; | ||
296 | *depth_align = 1; | ||
297 | *base_align = max(macro_tile_bytes, | ||
298 | (*pitch_align) * values->blocksize * (*height_align) * values->nsamples); | ||
132 | break; | 299 | break; |
133 | case V_038004_FMT_GB_GR: | ||
134 | case V_038004_FMT_BG_RG: | ||
135 | case V_038004_COLOR_INVALID: | ||
136 | default: | 300 | default: |
137 | *bpe = 16; | ||
138 | return -EINVAL; | 301 | return -EINVAL; |
139 | } | 302 | } |
303 | |||
140 | return 0; | 304 | return 0; |
141 | } | 305 | } |
142 | 306 | ||
@@ -153,10 +317,12 @@ static void r600_cs_track_init(struct r600_cs_track *track) | |||
153 | track->cb_color_info[i] = 0; | 317 | track->cb_color_info[i] = 0; |
154 | track->cb_color_bo[i] = NULL; | 318 | track->cb_color_bo[i] = NULL; |
155 | track->cb_color_bo_offset[i] = 0xFFFFFFFF; | 319 | track->cb_color_bo_offset[i] = 0xFFFFFFFF; |
320 | track->cb_color_bo_mc[i] = 0xFFFFFFFF; | ||
156 | } | 321 | } |
157 | track->cb_target_mask = 0xFFFFFFFF; | 322 | track->cb_target_mask = 0xFFFFFFFF; |
158 | track->cb_shader_mask = 0xFFFFFFFF; | 323 | track->cb_shader_mask = 0xFFFFFFFF; |
159 | track->db_bo = NULL; | 324 | track->db_bo = NULL; |
325 | track->db_bo_mc = 0xFFFFFFFF; | ||
160 | /* assume the biggest format and that htile is enabled */ | 326 | /* assume the biggest format and that htile is enabled */ |
161 | track->db_depth_info = 7 | (1 << 25); | 327 | track->db_depth_info = 7 | (1 << 25); |
162 | track->db_depth_view = 0xFFFFC000; | 328 | track->db_depth_view = 0xFFFFC000; |
@@ -168,71 +334,59 @@ static void r600_cs_track_init(struct r600_cs_track *track) | |||
168 | static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) | 334 | static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) |
169 | { | 335 | { |
170 | struct r600_cs_track *track = p->track; | 336 | struct r600_cs_track *track = p->track; |
171 | u32 bpe = 0, pitch, slice_tile_max, size, tmp, height, pitch_align; | 337 | u32 slice_tile_max, size, tmp; |
338 | u32 height, height_align, pitch, pitch_align, depth_align; | ||
339 | u64 base_offset, base_align; | ||
340 | struct array_mode_checker array_check; | ||
172 | volatile u32 *ib = p->ib->ptr; | 341 | volatile u32 *ib = p->ib->ptr; |
173 | 342 | unsigned array_mode; | |
343 | u32 format; | ||
174 | if (G_0280A0_TILE_MODE(track->cb_color_info[i])) { | 344 | if (G_0280A0_TILE_MODE(track->cb_color_info[i])) { |
175 | dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n"); | 345 | dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n"); |
176 | return -EINVAL; | 346 | return -EINVAL; |
177 | } | 347 | } |
178 | size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i]; | 348 | size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i]; |
179 | if (r600_bpe_from_format(&bpe, G_0280A0_FORMAT(track->cb_color_info[i]))) { | 349 | format = G_0280A0_FORMAT(track->cb_color_info[i]); |
350 | if (!fmt_is_valid_color(format)) { | ||
180 | dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n", | 351 | dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n", |
181 | __func__, __LINE__, G_0280A0_FORMAT(track->cb_color_info[i]), | 352 | __func__, __LINE__, format, |
182 | i, track->cb_color_info[i]); | 353 | i, track->cb_color_info[i]); |
183 | return -EINVAL; | 354 | return -EINVAL; |
184 | } | 355 | } |
185 | /* pitch is the number of 8x8 tiles per row */ | 356 | /* pitch in pixels */ |
186 | pitch = G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1; | 357 | pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8; |
187 | slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1; | 358 | slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1; |
188 | height = size / (pitch * 8 * bpe); | 359 | slice_tile_max *= 64; |
360 | height = slice_tile_max / pitch; | ||
189 | if (height > 8192) | 361 | if (height > 8192) |
190 | height = 8192; | 362 | height = 8192; |
191 | if (height > 7) | 363 | array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]); |
192 | height &= ~0x7; | 364 | |
193 | switch (G_0280A0_ARRAY_MODE(track->cb_color_info[i])) { | 365 | base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i]; |
366 | array_check.array_mode = array_mode; | ||
367 | array_check.group_size = track->group_size; | ||
368 | array_check.nbanks = track->nbanks; | ||
369 | array_check.npipes = track->npipes; | ||
370 | array_check.nsamples = track->nsamples; | ||
371 | array_check.blocksize = fmt_get_blocksize(format); | ||
372 | if (r600_get_array_mode_alignment(&array_check, | ||
373 | &pitch_align, &height_align, &depth_align, &base_align)) { | ||
374 | dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, | ||
375 | G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, | ||
376 | track->cb_color_info[i]); | ||
377 | return -EINVAL; | ||
378 | } | ||
379 | switch (array_mode) { | ||
194 | case V_0280A0_ARRAY_LINEAR_GENERAL: | 380 | case V_0280A0_ARRAY_LINEAR_GENERAL: |
195 | /* technically height & 0x7 */ | ||
196 | break; | 381 | break; |
197 | case V_0280A0_ARRAY_LINEAR_ALIGNED: | 382 | case V_0280A0_ARRAY_LINEAR_ALIGNED: |
198 | pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8; | ||
199 | if (!IS_ALIGNED(pitch, pitch_align)) { | ||
200 | dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", | ||
201 | __func__, __LINE__, pitch); | ||
202 | return -EINVAL; | ||
203 | } | ||
204 | if (!IS_ALIGNED(height, 8)) { | ||
205 | dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", | ||
206 | __func__, __LINE__, height); | ||
207 | return -EINVAL; | ||
208 | } | ||
209 | break; | 383 | break; |
210 | case V_0280A0_ARRAY_1D_TILED_THIN1: | 384 | case V_0280A0_ARRAY_1D_TILED_THIN1: |
211 | pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe * track->nsamples))) / 8; | 385 | /* avoid breaking userspace */ |
212 | if (!IS_ALIGNED(pitch, pitch_align)) { | 386 | if (height > 7) |
213 | dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", | 387 | height &= ~0x7; |
214 | __func__, __LINE__, pitch); | ||
215 | return -EINVAL; | ||
216 | } | ||
217 | if (!IS_ALIGNED(height, 8)) { | ||
218 | dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", | ||
219 | __func__, __LINE__, height); | ||
220 | return -EINVAL; | ||
221 | } | ||
222 | break; | 388 | break; |
223 | case V_0280A0_ARRAY_2D_TILED_THIN1: | 389 | case V_0280A0_ARRAY_2D_TILED_THIN1: |
224 | pitch_align = max((u32)track->nbanks, | ||
225 | (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks)); | ||
226 | if (!IS_ALIGNED(pitch, pitch_align)) { | ||
227 | dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", | ||
228 | __func__, __LINE__, pitch); | ||
229 | return -EINVAL; | ||
230 | } | ||
231 | if (!IS_ALIGNED((height / 8), track->nbanks)) { | ||
232 | dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", | ||
233 | __func__, __LINE__, height); | ||
234 | return -EINVAL; | ||
235 | } | ||
236 | break; | 390 | break; |
237 | default: | 391 | default: |
238 | dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, | 392 | dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, |
@@ -240,21 +394,46 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) | |||
240 | track->cb_color_info[i]); | 394 | track->cb_color_info[i]); |
241 | return -EINVAL; | 395 | return -EINVAL; |
242 | } | 396 | } |
243 | /* check offset */ | 397 | |
244 | tmp = height * pitch * 8 * bpe; | 398 | if (!IS_ALIGNED(pitch, pitch_align)) { |
245 | if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) { | 399 | dev_warn(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n", |
246 | dev_warn(p->dev, "%s offset[%d] %d too big\n", __func__, i, track->cb_color_bo_offset[i]); | 400 | __func__, __LINE__, pitch, pitch_align, array_mode); |
247 | return -EINVAL; | 401 | return -EINVAL; |
248 | } | 402 | } |
249 | if (!IS_ALIGNED(track->cb_color_bo_offset[i], track->group_size)) { | 403 | if (!IS_ALIGNED(height, height_align)) { |
250 | dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->cb_color_bo_offset[i]); | 404 | dev_warn(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n", |
405 | __func__, __LINE__, height, height_align, array_mode); | ||
251 | return -EINVAL; | 406 | return -EINVAL; |
252 | } | 407 | } |
408 | if (!IS_ALIGNED(base_offset, base_align)) { | ||
409 | dev_warn(p->dev, "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i, | ||
410 | base_offset, base_align, array_mode); | ||
411 | return -EINVAL; | ||
412 | } | ||
413 | |||
414 | /* check offset */ | ||
415 | tmp = fmt_get_nblocksy(format, height) * fmt_get_nblocksx(format, pitch) * fmt_get_blocksize(format); | ||
416 | if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) { | ||
417 | if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) { | ||
418 | /* the initial DDX does bad things with the CB size occasionally */ | ||
419 | /* it rounds up height too far for slice tile max but the BO is smaller */ | ||
420 | /* r600c,g also seem to flush at bad times in some apps resulting in | ||
421 | * bogus values here. So for linear just allow anything to avoid breaking | ||
422 | * broken userspace. | ||
423 | */ | ||
424 | } else { | ||
425 | dev_warn(p->dev, "%s offset[%d] %d %d %d %lu too big\n", __func__, i, | ||
426 | array_mode, | ||
427 | track->cb_color_bo_offset[i], tmp, | ||
428 | radeon_bo_size(track->cb_color_bo[i])); | ||
429 | return -EINVAL; | ||
430 | } | ||
431 | } | ||
253 | /* limit max tile */ | 432 | /* limit max tile */ |
254 | tmp = (height * pitch * 8) >> 6; | 433 | tmp = (height * pitch) >> 6; |
255 | if (tmp < slice_tile_max) | 434 | if (tmp < slice_tile_max) |
256 | slice_tile_max = tmp; | 435 | slice_tile_max = tmp; |
257 | tmp = S_028060_PITCH_TILE_MAX(pitch - 1) | | 436 | tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) | |
258 | S_028060_SLICE_TILE_MAX(slice_tile_max - 1); | 437 | S_028060_SLICE_TILE_MAX(slice_tile_max - 1); |
259 | ib[track->cb_color_size_idx[i]] = tmp; | 438 | ib[track->cb_color_size_idx[i]] = tmp; |
260 | return 0; | 439 | return 0; |
@@ -296,7 +475,12 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) | |||
296 | /* Check depth buffer */ | 475 | /* Check depth buffer */ |
297 | if (G_028800_STENCIL_ENABLE(track->db_depth_control) || | 476 | if (G_028800_STENCIL_ENABLE(track->db_depth_control) || |
298 | G_028800_Z_ENABLE(track->db_depth_control)) { | 477 | G_028800_Z_ENABLE(track->db_depth_control)) { |
299 | u32 nviews, bpe, ntiles, pitch, pitch_align, height, size; | 478 | u32 nviews, bpe, ntiles, size, slice_tile_max; |
479 | u32 height, height_align, pitch, pitch_align, depth_align; | ||
480 | u64 base_offset, base_align; | ||
481 | struct array_mode_checker array_check; | ||
482 | int array_mode; | ||
483 | |||
300 | if (track->db_bo == NULL) { | 484 | if (track->db_bo == NULL) { |
301 | dev_warn(p->dev, "z/stencil with no depth buffer\n"); | 485 | dev_warn(p->dev, "z/stencil with no depth buffer\n"); |
302 | return -EINVAL; | 486 | return -EINVAL; |
@@ -339,39 +523,34 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) | |||
339 | ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); | 523 | ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); |
340 | } else { | 524 | } else { |
341 | size = radeon_bo_size(track->db_bo); | 525 | size = radeon_bo_size(track->db_bo); |
342 | pitch = G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1; | 526 | /* pitch in pixels */ |
343 | height = size / (pitch * 8 * bpe); | 527 | pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8; |
344 | height &= ~0x7; | 528 | slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; |
345 | if (!height) | 529 | slice_tile_max *= 64; |
346 | height = 8; | 530 | height = slice_tile_max / pitch; |
347 | 531 | if (height > 8192) | |
348 | switch (G_028010_ARRAY_MODE(track->db_depth_info)) { | 532 | height = 8192; |
533 | base_offset = track->db_bo_mc + track->db_offset; | ||
534 | array_mode = G_028010_ARRAY_MODE(track->db_depth_info); | ||
535 | array_check.array_mode = array_mode; | ||
536 | array_check.group_size = track->group_size; | ||
537 | array_check.nbanks = track->nbanks; | ||
538 | array_check.npipes = track->npipes; | ||
539 | array_check.nsamples = track->nsamples; | ||
540 | array_check.blocksize = bpe; | ||
541 | if (r600_get_array_mode_alignment(&array_check, | ||
542 | &pitch_align, &height_align, &depth_align, &base_align)) { | ||
543 | dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, | ||
544 | G_028010_ARRAY_MODE(track->db_depth_info), | ||
545 | track->db_depth_info); | ||
546 | return -EINVAL; | ||
547 | } | ||
548 | switch (array_mode) { | ||
349 | case V_028010_ARRAY_1D_TILED_THIN1: | 549 | case V_028010_ARRAY_1D_TILED_THIN1: |
350 | pitch_align = (max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8); | 550 | /* don't break userspace */ |
351 | if (!IS_ALIGNED(pitch, pitch_align)) { | 551 | height &= ~0x7; |
352 | dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n", | ||
353 | __func__, __LINE__, pitch); | ||
354 | return -EINVAL; | ||
355 | } | ||
356 | if (!IS_ALIGNED(height, 8)) { | ||
357 | dev_warn(p->dev, "%s:%d db height (%d) invalid\n", | ||
358 | __func__, __LINE__, height); | ||
359 | return -EINVAL; | ||
360 | } | ||
361 | break; | 552 | break; |
362 | case V_028010_ARRAY_2D_TILED_THIN1: | 553 | case V_028010_ARRAY_2D_TILED_THIN1: |
363 | pitch_align = max((u32)track->nbanks, | ||
364 | (u32)(((track->group_size / 8) / bpe) * track->nbanks)); | ||
365 | if (!IS_ALIGNED(pitch, pitch_align)) { | ||
366 | dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n", | ||
367 | __func__, __LINE__, pitch); | ||
368 | return -EINVAL; | ||
369 | } | ||
370 | if ((height / 8) & (track->nbanks - 1)) { | ||
371 | dev_warn(p->dev, "%s:%d db height (%d) invalid\n", | ||
372 | __func__, __LINE__, height); | ||
373 | return -EINVAL; | ||
374 | } | ||
375 | break; | 554 | break; |
376 | default: | 555 | default: |
377 | dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, | 556 | dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, |
@@ -379,17 +558,31 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) | |||
379 | track->db_depth_info); | 558 | track->db_depth_info); |
380 | return -EINVAL; | 559 | return -EINVAL; |
381 | } | 560 | } |
382 | if (!IS_ALIGNED(track->db_offset, track->group_size)) { | 561 | |
383 | dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->db_offset); | 562 | if (!IS_ALIGNED(pitch, pitch_align)) { |
563 | dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", | ||
564 | __func__, __LINE__, pitch, pitch_align, array_mode); | ||
384 | return -EINVAL; | 565 | return -EINVAL; |
385 | } | 566 | } |
567 | if (!IS_ALIGNED(height, height_align)) { | ||
568 | dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", | ||
569 | __func__, __LINE__, height, height_align, array_mode); | ||
570 | return -EINVAL; | ||
571 | } | ||
572 | if (!IS_ALIGNED(base_offset, base_align)) { | ||
573 | dev_warn(p->dev, "%s offset[%d] 0x%llx, 0x%llx, %d not aligned\n", __func__, i, | ||
574 | base_offset, base_align, array_mode); | ||
575 | return -EINVAL; | ||
576 | } | ||
577 | |||
386 | ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; | 578 | ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; |
387 | nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; | 579 | nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; |
388 | tmp = ntiles * bpe * 64 * nviews; | 580 | tmp = ntiles * bpe * 64 * nviews; |
389 | if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { | 581 | if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { |
390 | dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %d have %ld)\n", | 582 | dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", |
391 | track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, | 583 | array_mode, |
392 | radeon_bo_size(track->db_bo)); | 584 | track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, |
585 | radeon_bo_size(track->db_bo)); | ||
393 | return -EINVAL; | 586 | return -EINVAL; |
394 | } | 587 | } |
395 | } | 588 | } |
@@ -595,33 +788,28 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p) | |||
595 | if (wait_reg_mem.type != PACKET_TYPE3 || | 788 | if (wait_reg_mem.type != PACKET_TYPE3 || |
596 | wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) { | 789 | wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) { |
597 | DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n"); | 790 | DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n"); |
598 | r = -EINVAL; | 791 | return -EINVAL; |
599 | return r; | ||
600 | } | 792 | } |
601 | 793 | ||
602 | wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1); | 794 | wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1); |
603 | /* bit 4 is reg (0) or mem (1) */ | 795 | /* bit 4 is reg (0) or mem (1) */ |
604 | if (wait_reg_mem_info & 0x10) { | 796 | if (wait_reg_mem_info & 0x10) { |
605 | DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n"); | 797 | DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n"); |
606 | r = -EINVAL; | 798 | return -EINVAL; |
607 | return r; | ||
608 | } | 799 | } |
609 | /* waiting for value to be equal */ | 800 | /* waiting for value to be equal */ |
610 | if ((wait_reg_mem_info & 0x7) != 0x3) { | 801 | if ((wait_reg_mem_info & 0x7) != 0x3) { |
611 | DRM_ERROR("vline WAIT_REG_MEM function not equal\n"); | 802 | DRM_ERROR("vline WAIT_REG_MEM function not equal\n"); |
612 | r = -EINVAL; | 803 | return -EINVAL; |
613 | return r; | ||
614 | } | 804 | } |
615 | if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != AVIVO_D1MODE_VLINE_STATUS) { | 805 | if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != AVIVO_D1MODE_VLINE_STATUS) { |
616 | DRM_ERROR("vline WAIT_REG_MEM bad reg\n"); | 806 | DRM_ERROR("vline WAIT_REG_MEM bad reg\n"); |
617 | r = -EINVAL; | 807 | return -EINVAL; |
618 | return r; | ||
619 | } | 808 | } |
620 | 809 | ||
621 | if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != AVIVO_D1MODE_VLINE_STAT) { | 810 | if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != AVIVO_D1MODE_VLINE_STAT) { |
622 | DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n"); | 811 | DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n"); |
623 | r = -EINVAL; | 812 | return -EINVAL; |
624 | return r; | ||
625 | } | 813 | } |
626 | 814 | ||
627 | /* jump over the NOP */ | 815 | /* jump over the NOP */ |
@@ -640,8 +828,7 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p) | |||
640 | obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); | 828 | obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); |
641 | if (!obj) { | 829 | if (!obj) { |
642 | DRM_ERROR("cannot find crtc %d\n", crtc_id); | 830 | DRM_ERROR("cannot find crtc %d\n", crtc_id); |
643 | r = -EINVAL; | 831 | return -EINVAL; |
644 | goto out; | ||
645 | } | 832 | } |
646 | crtc = obj_to_crtc(obj); | 833 | crtc = obj_to_crtc(obj); |
647 | radeon_crtc = to_radeon_crtc(crtc); | 834 | radeon_crtc = to_radeon_crtc(crtc); |
@@ -664,14 +851,13 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p) | |||
664 | break; | 851 | break; |
665 | default: | 852 | default: |
666 | DRM_ERROR("unknown crtc reloc\n"); | 853 | DRM_ERROR("unknown crtc reloc\n"); |
667 | r = -EINVAL; | 854 | return -EINVAL; |
668 | goto out; | ||
669 | } | 855 | } |
670 | ib[h_idx] = header; | 856 | ib[h_idx] = header; |
671 | ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2; | 857 | ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2; |
672 | } | 858 | } |
673 | out: | 859 | |
674 | return r; | 860 | return 0; |
675 | } | 861 | } |
676 | 862 | ||
677 | static int r600_packet0_check(struct radeon_cs_parser *p, | 863 | static int r600_packet0_check(struct radeon_cs_parser *p, |
@@ -743,7 +929,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx | |||
743 | return 0; | 929 | return 0; |
744 | ib = p->ib->ptr; | 930 | ib = p->ib->ptr; |
745 | switch (reg) { | 931 | switch (reg) { |
746 | /* force following reg to 0 in an attemp to disable out buffer | 932 | /* force following reg to 0 in an attempt to disable out buffer |
747 | * which will need us to better understand how it works to perform | 933 | * which will need us to better understand how it works to perform |
748 | * security check on it (Jerome) | 934 | * security check on it (Jerome) |
749 | */ | 935 | */ |
@@ -938,6 +1124,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx | |||
938 | ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); | 1124 | ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); |
939 | track->cb_color_base_last[tmp] = ib[idx]; | 1125 | track->cb_color_base_last[tmp] = ib[idx]; |
940 | track->cb_color_bo[tmp] = reloc->robj; | 1126 | track->cb_color_bo[tmp] = reloc->robj; |
1127 | track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; | ||
941 | break; | 1128 | break; |
942 | case DB_DEPTH_BASE: | 1129 | case DB_DEPTH_BASE: |
943 | r = r600_cs_packet_next_reloc(p, &reloc); | 1130 | r = r600_cs_packet_next_reloc(p, &reloc); |
@@ -949,6 +1136,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx | |||
949 | track->db_offset = radeon_get_ib_value(p, idx) << 8; | 1136 | track->db_offset = radeon_get_ib_value(p, idx) << 8; |
950 | ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); | 1137 | ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); |
951 | track->db_bo = reloc->robj; | 1138 | track->db_bo = reloc->robj; |
1139 | track->db_bo_mc = reloc->lobj.gpu_offset; | ||
952 | break; | 1140 | break; |
953 | case DB_HTILE_DATA_BASE: | 1141 | case DB_HTILE_DATA_BASE: |
954 | case SQ_PGM_START_FS: | 1142 | case SQ_PGM_START_FS: |
@@ -1019,39 +1207,61 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx | |||
1019 | return 0; | 1207 | return 0; |
1020 | } | 1208 | } |
1021 | 1209 | ||
1022 | static inline unsigned minify(unsigned size, unsigned levels) | 1210 | static inline unsigned mip_minify(unsigned size, unsigned level) |
1023 | { | 1211 | { |
1024 | size = size >> levels; | 1212 | unsigned val; |
1025 | if (size < 1) | 1213 | |
1026 | size = 1; | 1214 | val = max(1U, size >> level); |
1027 | return size; | 1215 | if (level > 0) |
1216 | val = roundup_pow_of_two(val); | ||
1217 | return val; | ||
1028 | } | 1218 | } |
1029 | 1219 | ||
1030 | static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels, | 1220 | static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, |
1031 | unsigned w0, unsigned h0, unsigned d0, unsigned bpe, | 1221 | unsigned w0, unsigned h0, unsigned d0, unsigned format, |
1032 | unsigned pitch_align, | 1222 | unsigned block_align, unsigned height_align, unsigned base_align, |
1033 | unsigned *l0_size, unsigned *mipmap_size) | 1223 | unsigned *l0_size, unsigned *mipmap_size) |
1034 | { | 1224 | { |
1035 | unsigned offset, i, level, face; | 1225 | unsigned offset, i, level; |
1036 | unsigned width, height, depth, rowstride, size; | 1226 | unsigned width, height, depth, size; |
1227 | unsigned blocksize; | ||
1228 | unsigned nbx, nby; | ||
1229 | unsigned nlevels = llevel - blevel + 1; | ||
1037 | 1230 | ||
1038 | w0 = minify(w0, 0); | 1231 | *l0_size = -1; |
1039 | h0 = minify(h0, 0); | 1232 | blocksize = fmt_get_blocksize(format); |
1040 | d0 = minify(d0, 0); | 1233 | |
1234 | w0 = mip_minify(w0, 0); | ||
1235 | h0 = mip_minify(h0, 0); | ||
1236 | d0 = mip_minify(d0, 0); | ||
1041 | for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) { | 1237 | for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) { |
1042 | width = minify(w0, i); | 1238 | width = mip_minify(w0, i); |
1043 | height = minify(h0, i); | 1239 | nbx = fmt_get_nblocksx(format, width); |
1044 | depth = minify(d0, i); | 1240 | |
1045 | for(face = 0; face < nfaces; face++) { | 1241 | nbx = round_up(nbx, block_align); |
1046 | rowstride = ALIGN((width * bpe), pitch_align); | 1242 | |
1047 | size = height * rowstride * depth; | 1243 | height = mip_minify(h0, i); |
1048 | offset += size; | 1244 | nby = fmt_get_nblocksy(format, height); |
1049 | offset = (offset + 0x1f) & ~0x1f; | 1245 | nby = round_up(nby, height_align); |
1050 | } | 1246 | |
1247 | depth = mip_minify(d0, i); | ||
1248 | |||
1249 | size = nbx * nby * blocksize; | ||
1250 | if (nfaces) | ||
1251 | size *= nfaces; | ||
1252 | else | ||
1253 | size *= depth; | ||
1254 | |||
1255 | if (i == 0) | ||
1256 | *l0_size = size; | ||
1257 | |||
1258 | if (i == 0 || i == 1) | ||
1259 | offset = round_up(offset, base_align); | ||
1260 | |||
1261 | offset += size; | ||
1051 | } | 1262 | } |
1052 | *l0_size = ALIGN((w0 * bpe), pitch_align) * h0 * d0; | ||
1053 | *mipmap_size = offset; | 1263 | *mipmap_size = offset; |
1054 | if (!nlevels) | 1264 | if (llevel == 0) |
1055 | *mipmap_size = *l0_size; | 1265 | *mipmap_size = *l0_size; |
1056 | if (!blevel) | 1266 | if (!blevel) |
1057 | *mipmap_size -= *l0_size; | 1267 | *mipmap_size -= *l0_size; |
@@ -1070,16 +1280,27 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels | |||
1070 | static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, | 1280 | static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, |
1071 | struct radeon_bo *texture, | 1281 | struct radeon_bo *texture, |
1072 | struct radeon_bo *mipmap, | 1282 | struct radeon_bo *mipmap, |
1283 | u64 base_offset, | ||
1284 | u64 mip_offset, | ||
1073 | u32 tiling_flags) | 1285 | u32 tiling_flags) |
1074 | { | 1286 | { |
1075 | struct r600_cs_track *track = p->track; | 1287 | struct r600_cs_track *track = p->track; |
1076 | u32 nfaces, nlevels, blevel, w0, h0, d0, bpe = 0; | 1288 | u32 nfaces, llevel, blevel, w0, h0, d0; |
1077 | u32 word0, word1, l0_size, mipmap_size, pitch, pitch_align; | 1289 | u32 word0, word1, l0_size, mipmap_size, word2, word3; |
1290 | u32 height_align, pitch, pitch_align, depth_align; | ||
1291 | u32 array, barray, larray; | ||
1292 | u64 base_align; | ||
1293 | struct array_mode_checker array_check; | ||
1294 | u32 format; | ||
1078 | 1295 | ||
1079 | /* on legacy kernel we don't perform advanced check */ | 1296 | /* on legacy kernel we don't perform advanced check */ |
1080 | if (p->rdev == NULL) | 1297 | if (p->rdev == NULL) |
1081 | return 0; | 1298 | return 0; |
1082 | 1299 | ||
1300 | /* convert to bytes */ | ||
1301 | base_offset <<= 8; | ||
1302 | mip_offset <<= 8; | ||
1303 | |||
1083 | word0 = radeon_get_ib_value(p, idx + 0); | 1304 | word0 = radeon_get_ib_value(p, idx + 0); |
1084 | if (tiling_flags & RADEON_TILING_MACRO) | 1305 | if (tiling_flags & RADEON_TILING_MACRO) |
1085 | word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); | 1306 | word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); |
@@ -1096,82 +1317,89 @@ static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 i | |||
1096 | case V_038000_SQ_TEX_DIM_3D: | 1317 | case V_038000_SQ_TEX_DIM_3D: |
1097 | break; | 1318 | break; |
1098 | case V_038000_SQ_TEX_DIM_CUBEMAP: | 1319 | case V_038000_SQ_TEX_DIM_CUBEMAP: |
1099 | nfaces = 6; | 1320 | if (p->family >= CHIP_RV770) |
1321 | nfaces = 8; | ||
1322 | else | ||
1323 | nfaces = 6; | ||
1100 | break; | 1324 | break; |
1101 | case V_038000_SQ_TEX_DIM_1D_ARRAY: | 1325 | case V_038000_SQ_TEX_DIM_1D_ARRAY: |
1102 | case V_038000_SQ_TEX_DIM_2D_ARRAY: | 1326 | case V_038000_SQ_TEX_DIM_2D_ARRAY: |
1327 | array = 1; | ||
1328 | break; | ||
1103 | case V_038000_SQ_TEX_DIM_2D_MSAA: | 1329 | case V_038000_SQ_TEX_DIM_2D_MSAA: |
1104 | case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA: | 1330 | case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA: |
1105 | default: | 1331 | default: |
1106 | dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); | 1332 | dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); |
1107 | return -EINVAL; | 1333 | return -EINVAL; |
1108 | } | 1334 | } |
1109 | if (r600_bpe_from_format(&bpe, G_038004_DATA_FORMAT(word1))) { | 1335 | format = G_038004_DATA_FORMAT(word1); |
1336 | if (!fmt_is_valid_texture(format, p->family)) { | ||
1110 | dev_warn(p->dev, "%s:%d texture invalid format %d\n", | 1337 | dev_warn(p->dev, "%s:%d texture invalid format %d\n", |
1111 | __func__, __LINE__, G_038004_DATA_FORMAT(word1)); | 1338 | __func__, __LINE__, format); |
1112 | return -EINVAL; | 1339 | return -EINVAL; |
1113 | } | 1340 | } |
1114 | 1341 | ||
1115 | pitch = G_038000_PITCH(word0) + 1; | 1342 | /* pitch in texels */ |
1116 | switch (G_038000_TILE_MODE(word0)) { | 1343 | pitch = (G_038000_PITCH(word0) + 1) * 8; |
1117 | case V_038000_ARRAY_LINEAR_GENERAL: | 1344 | array_check.array_mode = G_038000_TILE_MODE(word0); |
1118 | pitch_align = 1; | 1345 | array_check.group_size = track->group_size; |
1119 | /* XXX check height align */ | 1346 | array_check.nbanks = track->nbanks; |
1120 | break; | 1347 | array_check.npipes = track->npipes; |
1121 | case V_038000_ARRAY_LINEAR_ALIGNED: | 1348 | array_check.nsamples = 1; |
1122 | pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8; | 1349 | array_check.blocksize = fmt_get_blocksize(format); |
1123 | if (!IS_ALIGNED(pitch, pitch_align)) { | 1350 | if (r600_get_array_mode_alignment(&array_check, |
1124 | dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", | 1351 | &pitch_align, &height_align, &depth_align, &base_align)) { |
1125 | __func__, __LINE__, pitch); | 1352 | dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n", |
1126 | return -EINVAL; | 1353 | __func__, __LINE__, G_038000_TILE_MODE(word0)); |
1127 | } | 1354 | return -EINVAL; |
1128 | /* XXX check height align */ | 1355 | } |
1129 | break; | 1356 | |
1130 | case V_038000_ARRAY_1D_TILED_THIN1: | 1357 | /* XXX check height as well... */ |
1131 | pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8; | 1358 | |
1132 | if (!IS_ALIGNED(pitch, pitch_align)) { | 1359 | if (!IS_ALIGNED(pitch, pitch_align)) { |
1133 | dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", | 1360 | dev_warn(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n", |
1134 | __func__, __LINE__, pitch); | 1361 | __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0)); |
1135 | return -EINVAL; | 1362 | return -EINVAL; |
1136 | } | 1363 | } |
1137 | /* XXX check height align */ | 1364 | if (!IS_ALIGNED(base_offset, base_align)) { |
1138 | break; | 1365 | dev_warn(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n", |
1139 | case V_038000_ARRAY_2D_TILED_THIN1: | 1366 | __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0)); |
1140 | pitch_align = max((u32)track->nbanks, | 1367 | return -EINVAL; |
1141 | (u32)(((track->group_size / 8) / bpe) * track->nbanks)); | 1368 | } |
1142 | if (!IS_ALIGNED(pitch, pitch_align)) { | 1369 | if (!IS_ALIGNED(mip_offset, base_align)) { |
1143 | dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", | 1370 | dev_warn(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n", |
1144 | __func__, __LINE__, pitch); | 1371 | __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0)); |
1145 | return -EINVAL; | ||
1146 | } | ||
1147 | /* XXX check height align */ | ||
1148 | break; | ||
1149 | default: | ||
1150 | dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, | ||
1151 | G_038000_TILE_MODE(word0), word0); | ||
1152 | return -EINVAL; | 1372 | return -EINVAL; |
1153 | } | 1373 | } |
1154 | /* XXX check offset align */ | 1374 | |
1375 | word2 = radeon_get_ib_value(p, idx + 2) << 8; | ||
1376 | word3 = radeon_get_ib_value(p, idx + 3) << 8; | ||
1155 | 1377 | ||
1156 | word0 = radeon_get_ib_value(p, idx + 4); | 1378 | word0 = radeon_get_ib_value(p, idx + 4); |
1157 | word1 = radeon_get_ib_value(p, idx + 5); | 1379 | word1 = radeon_get_ib_value(p, idx + 5); |
1158 | blevel = G_038010_BASE_LEVEL(word0); | 1380 | blevel = G_038010_BASE_LEVEL(word0); |
1159 | nlevels = G_038014_LAST_LEVEL(word1); | 1381 | llevel = G_038014_LAST_LEVEL(word1); |
1160 | r600_texture_size(nfaces, blevel, nlevels, w0, h0, d0, bpe, | 1382 | if (array == 1) { |
1161 | (pitch_align * bpe), | 1383 | barray = G_038014_BASE_ARRAY(word1); |
1384 | larray = G_038014_LAST_ARRAY(word1); | ||
1385 | |||
1386 | nfaces = larray - barray + 1; | ||
1387 | } | ||
1388 | r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, format, | ||
1389 | pitch_align, height_align, base_align, | ||
1162 | &l0_size, &mipmap_size); | 1390 | &l0_size, &mipmap_size); |
1163 | /* using get ib will give us the offset into the texture bo */ | 1391 | /* using get ib will give us the offset into the texture bo */ |
1164 | word0 = radeon_get_ib_value(p, idx + 2) << 8; | 1392 | if ((l0_size + word2) > radeon_bo_size(texture)) { |
1165 | if ((l0_size + word0) > radeon_bo_size(texture)) { | ||
1166 | dev_warn(p->dev, "texture bo too small (%d %d %d %d -> %d have %ld)\n", | 1393 | dev_warn(p->dev, "texture bo too small (%d %d %d %d -> %d have %ld)\n", |
1167 | w0, h0, bpe, word0, l0_size, radeon_bo_size(texture)); | 1394 | w0, h0, format, word2, l0_size, radeon_bo_size(texture)); |
1395 | dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align); | ||
1168 | return -EINVAL; | 1396 | return -EINVAL; |
1169 | } | 1397 | } |
1170 | /* using get ib will give us the offset into the mipmap bo */ | 1398 | /* using get ib will give us the offset into the mipmap bo */ |
1171 | word0 = radeon_get_ib_value(p, idx + 3) << 8; | 1399 | word3 = radeon_get_ib_value(p, idx + 3) << 8; |
1172 | if ((mipmap_size + word0) > radeon_bo_size(mipmap)) { | 1400 | if ((mipmap_size + word3) > radeon_bo_size(mipmap)) { |
1173 | /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", | 1401 | /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", |
1174 | w0, h0, bpe, blevel, nlevels, word0, mipmap_size, radeon_bo_size(texture));*/ | 1402 | w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/ |
1175 | } | 1403 | } |
1176 | return 0; | 1404 | return 0; |
1177 | } | 1405 | } |
@@ -1194,6 +1422,38 @@ static int r600_packet3_check(struct radeon_cs_parser *p, | |||
1194 | idx_value = radeon_get_ib_value(p, idx); | 1422 | idx_value = radeon_get_ib_value(p, idx); |
1195 | 1423 | ||
1196 | switch (pkt->opcode) { | 1424 | switch (pkt->opcode) { |
1425 | case PACKET3_SET_PREDICATION: | ||
1426 | { | ||
1427 | int pred_op; | ||
1428 | int tmp; | ||
1429 | if (pkt->count != 1) { | ||
1430 | DRM_ERROR("bad SET PREDICATION\n"); | ||
1431 | return -EINVAL; | ||
1432 | } | ||
1433 | |||
1434 | tmp = radeon_get_ib_value(p, idx + 1); | ||
1435 | pred_op = (tmp >> 16) & 0x7; | ||
1436 | |||
1437 | /* for the clear predicate operation */ | ||
1438 | if (pred_op == 0) | ||
1439 | return 0; | ||
1440 | |||
1441 | if (pred_op > 2) { | ||
1442 | DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); | ||
1443 | return -EINVAL; | ||
1444 | } | ||
1445 | |||
1446 | r = r600_cs_packet_next_reloc(p, &reloc); | ||
1447 | if (r) { | ||
1448 | DRM_ERROR("bad SET PREDICATION\n"); | ||
1449 | return -EINVAL; | ||
1450 | } | ||
1451 | |||
1452 | ib[idx + 0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); | ||
1453 | ib[idx + 1] = tmp + (upper_32_bits(reloc->lobj.gpu_offset) & 0xff); | ||
1454 | } | ||
1455 | break; | ||
1456 | |||
1197 | case PACKET3_START_3D_CMDBUF: | 1457 | case PACKET3_START_3D_CMDBUF: |
1198 | if (p->family >= CHIP_RV770 || pkt->count) { | 1458 | if (p->family >= CHIP_RV770 || pkt->count) { |
1199 | DRM_ERROR("bad START_3D\n"); | 1459 | DRM_ERROR("bad START_3D\n"); |
@@ -1386,7 +1646,10 @@ static int r600_packet3_check(struct radeon_cs_parser *p, | |||
1386 | mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); | 1646 | mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); |
1387 | mipmap = reloc->robj; | 1647 | mipmap = reloc->robj; |
1388 | r = r600_check_texture_resource(p, idx+(i*7)+1, | 1648 | r = r600_check_texture_resource(p, idx+(i*7)+1, |
1389 | texture, mipmap, reloc->lobj.tiling_flags); | 1649 | texture, mipmap, |
1650 | base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2), | ||
1651 | mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3), | ||
1652 | reloc->lobj.tiling_flags); | ||
1390 | if (r) | 1653 | if (r) |
1391 | return r; | 1654 | return r; |
1392 | ib[idx+1+(i*7)+2] += base_offset; | 1655 | ib[idx+1+(i*7)+2] += base_offset; |