diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_blit.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600_blit.c | 874 |
1 files changed, 0 insertions, 874 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c deleted file mode 100644 index daf7572be976..000000000000 --- a/drivers/gpu/drm/radeon/r600_blit.c +++ /dev/null | |||
@@ -1,874 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2009 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
21 | * DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: | ||
24 | * Alex Deucher <alexander.deucher@amd.com> | ||
25 | * | ||
26 | * ------------------------ This file is DEPRECATED! ------------------------- | ||
27 | */ | ||
28 | #include <drm/drmP.h> | ||
29 | #include <drm/radeon_drm.h> | ||
30 | #include "radeon_drv.h" | ||
31 | |||
32 | #include "r600_blit_shaders.h" | ||
33 | |||
34 | /* 23 bits of float fractional data */ | ||
35 | #define I2F_FRAC_BITS 23 | ||
36 | #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) | ||
37 | |||
38 | /* | ||
39 | * Converts unsigned integer into 32-bit IEEE floating point representation. | ||
40 | * Will be exact from 0 to 2^24. Above that, we round towards zero | ||
41 | * as the fractional bits will not fit in a float. (It would be better to | ||
42 | * round towards even as the fpu does, but that is slower.) | ||
43 | */ | ||
44 | static __pure uint32_t int2float(uint32_t x) | ||
45 | { | ||
46 | uint32_t msb, exponent, fraction; | ||
47 | |||
48 | /* Zero is special */ | ||
49 | if (!x) return 0; | ||
50 | |||
51 | /* Get location of the most significant bit */ | ||
52 | msb = __fls(x); | ||
53 | |||
54 | /* | ||
55 | * Use a rotate instead of a shift because that works both leftwards | ||
56 | * and rightwards due to the mod(32) behaviour. This means we don't | ||
57 | * need to check to see if we are above 2^24 or not. | ||
58 | */ | ||
59 | fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK; | ||
60 | exponent = (127 + msb) << I2F_FRAC_BITS; | ||
61 | |||
62 | return fraction + exponent; | ||
63 | } | ||
64 | |||
65 | #define DI_PT_RECTLIST 0x11 | ||
66 | #define DI_INDEX_SIZE_16_BIT 0x0 | ||
67 | #define DI_SRC_SEL_AUTO_INDEX 0x2 | ||
68 | |||
69 | #define FMT_8 0x1 | ||
70 | #define FMT_5_6_5 0x8 | ||
71 | #define FMT_8_8_8_8 0x1a | ||
72 | #define COLOR_8 0x1 | ||
73 | #define COLOR_5_6_5 0x8 | ||
74 | #define COLOR_8_8_8_8 0x1a | ||
75 | |||
76 | static void | ||
77 | set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) | ||
78 | { | ||
79 | u32 cb_color_info; | ||
80 | int pitch, slice; | ||
81 | RING_LOCALS; | ||
82 | DRM_DEBUG("\n"); | ||
83 | |||
84 | h = ALIGN(h, 8); | ||
85 | if (h < 8) | ||
86 | h = 8; | ||
87 | |||
88 | cb_color_info = ((format << 2) | (1 << 27)); | ||
89 | pitch = (w / 8) - 1; | ||
90 | slice = ((w * h) / 64) - 1; | ||
91 | |||
92 | if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) && | ||
93 | ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) { | ||
94 | BEGIN_RING(21 + 2); | ||
95 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
96 | OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
97 | OUT_RING(gpu_addr >> 8); | ||
98 | OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); | ||
99 | OUT_RING(2 << 0); | ||
100 | } else { | ||
101 | BEGIN_RING(21); | ||
102 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
103 | OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
104 | OUT_RING(gpu_addr >> 8); | ||
105 | } | ||
106 | |||
107 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
108 | OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
109 | OUT_RING((pitch << 0) | (slice << 10)); | ||
110 | |||
111 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
112 | OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
113 | OUT_RING(0); | ||
114 | |||
115 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
116 | OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
117 | OUT_RING(cb_color_info); | ||
118 | |||
119 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
120 | OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
121 | OUT_RING(0); | ||
122 | |||
123 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
124 | OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
125 | OUT_RING(0); | ||
126 | |||
127 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
128 | OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
129 | OUT_RING(0); | ||
130 | |||
131 | ADVANCE_RING(); | ||
132 | } | ||
133 | |||
134 | static void | ||
135 | cp_set_surface_sync(drm_radeon_private_t *dev_priv, | ||
136 | u32 sync_type, u32 size, u64 mc_addr) | ||
137 | { | ||
138 | u32 cp_coher_size; | ||
139 | RING_LOCALS; | ||
140 | DRM_DEBUG("\n"); | ||
141 | |||
142 | if (size == 0xffffffff) | ||
143 | cp_coher_size = 0xffffffff; | ||
144 | else | ||
145 | cp_coher_size = ((size + 255) >> 8); | ||
146 | |||
147 | BEGIN_RING(5); | ||
148 | OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); | ||
149 | OUT_RING(sync_type); | ||
150 | OUT_RING(cp_coher_size); | ||
151 | OUT_RING((mc_addr >> 8)); | ||
152 | OUT_RING(10); /* poll interval */ | ||
153 | ADVANCE_RING(); | ||
154 | } | ||
155 | |||
156 | static void | ||
157 | set_shaders(struct drm_device *dev) | ||
158 | { | ||
159 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
160 | u64 gpu_addr; | ||
161 | int i; | ||
162 | u32 *vs, *ps; | ||
163 | uint32_t sq_pgm_resources; | ||
164 | RING_LOCALS; | ||
165 | DRM_DEBUG("\n"); | ||
166 | |||
167 | /* load shaders */ | ||
168 | vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); | ||
169 | ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); | ||
170 | |||
171 | for (i = 0; i < r6xx_vs_size; i++) | ||
172 | vs[i] = cpu_to_le32(r6xx_vs[i]); | ||
173 | for (i = 0; i < r6xx_ps_size; i++) | ||
174 | ps[i] = cpu_to_le32(r6xx_ps[i]); | ||
175 | |||
176 | dev_priv->blit_vb->used = 512; | ||
177 | |||
178 | gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset; | ||
179 | |||
180 | /* setup shader regs */ | ||
181 | sq_pgm_resources = (1 << 0); | ||
182 | |||
183 | BEGIN_RING(9 + 12); | ||
184 | /* VS */ | ||
185 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
186 | OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
187 | OUT_RING(gpu_addr >> 8); | ||
188 | |||
189 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
190 | OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
191 | OUT_RING(sq_pgm_resources); | ||
192 | |||
193 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
194 | OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
195 | OUT_RING(0); | ||
196 | |||
197 | /* PS */ | ||
198 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
199 | OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
200 | OUT_RING((gpu_addr + 256) >> 8); | ||
201 | |||
202 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
203 | OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
204 | OUT_RING(sq_pgm_resources | (1 << 28)); | ||
205 | |||
206 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
207 | OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
208 | OUT_RING(2); | ||
209 | |||
210 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); | ||
211 | OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
212 | OUT_RING(0); | ||
213 | ADVANCE_RING(); | ||
214 | |||
215 | cp_set_surface_sync(dev_priv, | ||
216 | R600_SH_ACTION_ENA, 512, gpu_addr); | ||
217 | } | ||
218 | |||
219 | static void | ||
220 | set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) | ||
221 | { | ||
222 | uint32_t sq_vtx_constant_word2; | ||
223 | RING_LOCALS; | ||
224 | DRM_DEBUG("\n"); | ||
225 | |||
226 | sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); | ||
227 | #ifdef __BIG_ENDIAN | ||
228 | sq_vtx_constant_word2 |= (2 << 30); | ||
229 | #endif | ||
230 | |||
231 | BEGIN_RING(9); | ||
232 | OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); | ||
233 | OUT_RING(0x460); | ||
234 | OUT_RING(gpu_addr & 0xffffffff); | ||
235 | OUT_RING(48 - 1); | ||
236 | OUT_RING(sq_vtx_constant_word2); | ||
237 | OUT_RING(1 << 0); | ||
238 | OUT_RING(0); | ||
239 | OUT_RING(0); | ||
240 | OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30); | ||
241 | ADVANCE_RING(); | ||
242 | |||
243 | if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || | ||
244 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || | ||
245 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || | ||
246 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || | ||
247 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) | ||
248 | cp_set_surface_sync(dev_priv, | ||
249 | R600_TC_ACTION_ENA, 48, gpu_addr); | ||
250 | else | ||
251 | cp_set_surface_sync(dev_priv, | ||
252 | R600_VC_ACTION_ENA, 48, gpu_addr); | ||
253 | } | ||
254 | |||
255 | static void | ||
256 | set_tex_resource(drm_radeon_private_t *dev_priv, | ||
257 | int format, int w, int h, int pitch, u64 gpu_addr) | ||
258 | { | ||
259 | uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; | ||
260 | RING_LOCALS; | ||
261 | DRM_DEBUG("\n"); | ||
262 | |||
263 | if (h < 1) | ||
264 | h = 1; | ||
265 | |||
266 | sq_tex_resource_word0 = (1 << 0); | ||
267 | sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | | ||
268 | ((w - 1) << 19)); | ||
269 | |||
270 | sq_tex_resource_word1 = (format << 26); | ||
271 | sq_tex_resource_word1 |= ((h - 1) << 0); | ||
272 | |||
273 | sq_tex_resource_word4 = ((1 << 14) | | ||
274 | (0 << 16) | | ||
275 | (1 << 19) | | ||
276 | (2 << 22) | | ||
277 | (3 << 25)); | ||
278 | |||
279 | BEGIN_RING(9); | ||
280 | OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); | ||
281 | OUT_RING(0); | ||
282 | OUT_RING(sq_tex_resource_word0); | ||
283 | OUT_RING(sq_tex_resource_word1); | ||
284 | OUT_RING(gpu_addr >> 8); | ||
285 | OUT_RING(gpu_addr >> 8); | ||
286 | OUT_RING(sq_tex_resource_word4); | ||
287 | OUT_RING(0); | ||
288 | OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); | ||
289 | ADVANCE_RING(); | ||
290 | |||
291 | } | ||
292 | |||
293 | static void | ||
294 | set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) | ||
295 | { | ||
296 | RING_LOCALS; | ||
297 | DRM_DEBUG("\n"); | ||
298 | |||
299 | BEGIN_RING(12); | ||
300 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); | ||
301 | OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
302 | OUT_RING((x1 << 0) | (y1 << 16)); | ||
303 | OUT_RING((x2 << 0) | (y2 << 16)); | ||
304 | |||
305 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); | ||
306 | OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
307 | OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); | ||
308 | OUT_RING((x2 << 0) | (y2 << 16)); | ||
309 | |||
310 | OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); | ||
311 | OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); | ||
312 | OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); | ||
313 | OUT_RING((x2 << 0) | (y2 << 16)); | ||
314 | ADVANCE_RING(); | ||
315 | } | ||
316 | |||
317 | static void | ||
318 | draw_auto(drm_radeon_private_t *dev_priv) | ||
319 | { | ||
320 | RING_LOCALS; | ||
321 | DRM_DEBUG("\n"); | ||
322 | |||
323 | BEGIN_RING(10); | ||
324 | OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); | ||
325 | OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); | ||
326 | OUT_RING(DI_PT_RECTLIST); | ||
327 | |||
328 | OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); | ||
329 | #ifdef __BIG_ENDIAN | ||
330 | OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT); | ||
331 | #else | ||
332 | OUT_RING(DI_INDEX_SIZE_16_BIT); | ||
333 | #endif | ||
334 | |||
335 | OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); | ||
336 | OUT_RING(1); | ||
337 | |||
338 | OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); | ||
339 | OUT_RING(3); | ||
340 | OUT_RING(DI_SRC_SEL_AUTO_INDEX); | ||
341 | |||
342 | ADVANCE_RING(); | ||
343 | COMMIT_RING(); | ||
344 | } | ||
345 | |||
346 | static void | ||
347 | set_default_state(drm_radeon_private_t *dev_priv) | ||
348 | { | ||
349 | int i; | ||
350 | u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; | ||
351 | u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; | ||
352 | int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; | ||
353 | int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; | ||
354 | int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; | ||
355 | RING_LOCALS; | ||
356 | |||
357 | switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { | ||
358 | case CHIP_R600: | ||
359 | num_ps_gprs = 192; | ||
360 | num_vs_gprs = 56; | ||
361 | num_temp_gprs = 4; | ||
362 | num_gs_gprs = 0; | ||
363 | num_es_gprs = 0; | ||
364 | num_ps_threads = 136; | ||
365 | num_vs_threads = 48; | ||
366 | num_gs_threads = 4; | ||
367 | num_es_threads = 4; | ||
368 | num_ps_stack_entries = 128; | ||
369 | num_vs_stack_entries = 128; | ||
370 | num_gs_stack_entries = 0; | ||
371 | num_es_stack_entries = 0; | ||
372 | break; | ||
373 | case CHIP_RV630: | ||
374 | case CHIP_RV635: | ||
375 | num_ps_gprs = 84; | ||
376 | num_vs_gprs = 36; | ||
377 | num_temp_gprs = 4; | ||
378 | num_gs_gprs = 0; | ||
379 | num_es_gprs = 0; | ||
380 | num_ps_threads = 144; | ||
381 | num_vs_threads = 40; | ||
382 | num_gs_threads = 4; | ||
383 | num_es_threads = 4; | ||
384 | num_ps_stack_entries = 40; | ||
385 | num_vs_stack_entries = 40; | ||
386 | num_gs_stack_entries = 32; | ||
387 | num_es_stack_entries = 16; | ||
388 | break; | ||
389 | case CHIP_RV610: | ||
390 | case CHIP_RV620: | ||
391 | case CHIP_RS780: | ||
392 | case CHIP_RS880: | ||
393 | default: | ||
394 | num_ps_gprs = 84; | ||
395 | num_vs_gprs = 36; | ||
396 | num_temp_gprs = 4; | ||
397 | num_gs_gprs = 0; | ||
398 | num_es_gprs = 0; | ||
399 | num_ps_threads = 136; | ||
400 | num_vs_threads = 48; | ||
401 | num_gs_threads = 4; | ||
402 | num_es_threads = 4; | ||
403 | num_ps_stack_entries = 40; | ||
404 | num_vs_stack_entries = 40; | ||
405 | num_gs_stack_entries = 32; | ||
406 | num_es_stack_entries = 16; | ||
407 | break; | ||
408 | case CHIP_RV670: | ||
409 | num_ps_gprs = 144; | ||
410 | num_vs_gprs = 40; | ||
411 | num_temp_gprs = 4; | ||
412 | num_gs_gprs = 0; | ||
413 | num_es_gprs = 0; | ||
414 | num_ps_threads = 136; | ||
415 | num_vs_threads = 48; | ||
416 | num_gs_threads = 4; | ||
417 | num_es_threads = 4; | ||
418 | num_ps_stack_entries = 40; | ||
419 | num_vs_stack_entries = 40; | ||
420 | num_gs_stack_entries = 32; | ||
421 | num_es_stack_entries = 16; | ||
422 | break; | ||
423 | case CHIP_RV770: | ||
424 | num_ps_gprs = 192; | ||
425 | num_vs_gprs = 56; | ||
426 | num_temp_gprs = 4; | ||
427 | num_gs_gprs = 0; | ||
428 | num_es_gprs = 0; | ||
429 | num_ps_threads = 188; | ||
430 | num_vs_threads = 60; | ||
431 | num_gs_threads = 0; | ||
432 | num_es_threads = 0; | ||
433 | num_ps_stack_entries = 256; | ||
434 | num_vs_stack_entries = 256; | ||
435 | num_gs_stack_entries = 0; | ||
436 | num_es_stack_entries = 0; | ||
437 | break; | ||
438 | case CHIP_RV730: | ||
439 | case CHIP_RV740: | ||
440 | num_ps_gprs = 84; | ||
441 | num_vs_gprs = 36; | ||
442 | num_temp_gprs = 4; | ||
443 | num_gs_gprs = 0; | ||
444 | num_es_gprs = 0; | ||
445 | num_ps_threads = 188; | ||
446 | num_vs_threads = 60; | ||
447 | num_gs_threads = 0; | ||
448 | num_es_threads = 0; | ||
449 | num_ps_stack_entries = 128; | ||
450 | num_vs_stack_entries = 128; | ||
451 | num_gs_stack_entries = 0; | ||
452 | num_es_stack_entries = 0; | ||
453 | break; | ||
454 | case CHIP_RV710: | ||
455 | num_ps_gprs = 192; | ||
456 | num_vs_gprs = 56; | ||
457 | num_temp_gprs = 4; | ||
458 | num_gs_gprs = 0; | ||
459 | num_es_gprs = 0; | ||
460 | num_ps_threads = 144; | ||
461 | num_vs_threads = 48; | ||
462 | num_gs_threads = 0; | ||
463 | num_es_threads = 0; | ||
464 | num_ps_stack_entries = 128; | ||
465 | num_vs_stack_entries = 128; | ||
466 | num_gs_stack_entries = 0; | ||
467 | num_es_stack_entries = 0; | ||
468 | break; | ||
469 | } | ||
470 | |||
471 | if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || | ||
472 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || | ||
473 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || | ||
474 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || | ||
475 | ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) | ||
476 | sq_config = 0; | ||
477 | else | ||
478 | sq_config = R600_VC_ENABLE; | ||
479 | |||
480 | sq_config |= (R600_DX9_CONSTS | | ||
481 | R600_ALU_INST_PREFER_VECTOR | | ||
482 | R600_PS_PRIO(0) | | ||
483 | R600_VS_PRIO(1) | | ||
484 | R600_GS_PRIO(2) | | ||
485 | R600_ES_PRIO(3)); | ||
486 | |||
487 | sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | | ||
488 | R600_NUM_VS_GPRS(num_vs_gprs) | | ||
489 | R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); | ||
490 | sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | | ||
491 | R600_NUM_ES_GPRS(num_es_gprs)); | ||
492 | sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | | ||
493 | R600_NUM_VS_THREADS(num_vs_threads) | | ||
494 | R600_NUM_GS_THREADS(num_gs_threads) | | ||
495 | R600_NUM_ES_THREADS(num_es_threads)); | ||
496 | sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | | ||
497 | R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); | ||
498 | sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | | ||
499 | R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); | ||
500 | |||
501 | if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { | ||
502 | BEGIN_RING(r7xx_default_size + 10); | ||
503 | for (i = 0; i < r7xx_default_size; i++) | ||
504 | OUT_RING(r7xx_default_state[i]); | ||
505 | } else { | ||
506 | BEGIN_RING(r6xx_default_size + 10); | ||
507 | for (i = 0; i < r6xx_default_size; i++) | ||
508 | OUT_RING(r6xx_default_state[i]); | ||
509 | } | ||
510 | OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); | ||
511 | OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); | ||
512 | /* SQ config */ | ||
513 | OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); | ||
514 | OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); | ||
515 | OUT_RING(sq_config); | ||
516 | OUT_RING(sq_gpr_resource_mgmt_1); | ||
517 | OUT_RING(sq_gpr_resource_mgmt_2); | ||
518 | OUT_RING(sq_thread_resource_mgmt); | ||
519 | OUT_RING(sq_stack_resource_mgmt_1); | ||
520 | OUT_RING(sq_stack_resource_mgmt_2); | ||
521 | ADVANCE_RING(); | ||
522 | } | ||
523 | |||
524 | static int r600_nomm_get_vb(struct drm_device *dev) | ||
525 | { | ||
526 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
527 | dev_priv->blit_vb = radeon_freelist_get(dev); | ||
528 | if (!dev_priv->blit_vb) { | ||
529 | DRM_ERROR("Unable to allocate vertex buffer for blit\n"); | ||
530 | return -EAGAIN; | ||
531 | } | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static void r600_nomm_put_vb(struct drm_device *dev) | ||
536 | { | ||
537 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
538 | |||
539 | dev_priv->blit_vb->used = 0; | ||
540 | radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); | ||
541 | } | ||
542 | |||
543 | static void *r600_nomm_get_vb_ptr(struct drm_device *dev) | ||
544 | { | ||
545 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
546 | return (((char *)dev->agp_buffer_map->handle + | ||
547 | dev_priv->blit_vb->offset + dev_priv->blit_vb->used)); | ||
548 | } | ||
549 | |||
550 | int | ||
551 | r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv) | ||
552 | { | ||
553 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
554 | int ret; | ||
555 | DRM_DEBUG("\n"); | ||
556 | |||
557 | ret = r600_nomm_get_vb(dev); | ||
558 | if (ret) | ||
559 | return ret; | ||
560 | |||
561 | dev_priv->blit_vb->file_priv = file_priv; | ||
562 | |||
563 | set_default_state(dev_priv); | ||
564 | set_shaders(dev); | ||
565 | |||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | |||
570 | void | ||
571 | r600_done_blit_copy(struct drm_device *dev) | ||
572 | { | ||
573 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
574 | RING_LOCALS; | ||
575 | DRM_DEBUG("\n"); | ||
576 | |||
577 | BEGIN_RING(5); | ||
578 | OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); | ||
579 | OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); | ||
580 | /* wait for 3D idle clean */ | ||
581 | OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); | ||
582 | OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); | ||
583 | OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); | ||
584 | |||
585 | ADVANCE_RING(); | ||
586 | COMMIT_RING(); | ||
587 | |||
588 | r600_nomm_put_vb(dev); | ||
589 | } | ||
590 | |||
591 | void | ||
592 | r600_blit_copy(struct drm_device *dev, | ||
593 | uint64_t src_gpu_addr, uint64_t dst_gpu_addr, | ||
594 | int size_bytes) | ||
595 | { | ||
596 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
597 | int max_bytes; | ||
598 | u64 vb_addr; | ||
599 | u32 *vb; | ||
600 | |||
601 | vb = r600_nomm_get_vb_ptr(dev); | ||
602 | |||
603 | if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { | ||
604 | max_bytes = 8192; | ||
605 | |||
606 | while (size_bytes) { | ||
607 | int cur_size = size_bytes; | ||
608 | int src_x = src_gpu_addr & 255; | ||
609 | int dst_x = dst_gpu_addr & 255; | ||
610 | int h = 1; | ||
611 | src_gpu_addr = src_gpu_addr & ~255; | ||
612 | dst_gpu_addr = dst_gpu_addr & ~255; | ||
613 | |||
614 | if (!src_x && !dst_x) { | ||
615 | h = (cur_size / max_bytes); | ||
616 | if (h > 8192) | ||
617 | h = 8192; | ||
618 | if (h == 0) | ||
619 | h = 1; | ||
620 | else | ||
621 | cur_size = max_bytes; | ||
622 | } else { | ||
623 | if (cur_size > max_bytes) | ||
624 | cur_size = max_bytes; | ||
625 | if (cur_size > (max_bytes - dst_x)) | ||
626 | cur_size = (max_bytes - dst_x); | ||
627 | if (cur_size > (max_bytes - src_x)) | ||
628 | cur_size = (max_bytes - src_x); | ||
629 | } | ||
630 | |||
631 | if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { | ||
632 | |||
633 | r600_nomm_put_vb(dev); | ||
634 | r600_nomm_get_vb(dev); | ||
635 | if (!dev_priv->blit_vb) | ||
636 | return; | ||
637 | set_shaders(dev); | ||
638 | vb = r600_nomm_get_vb_ptr(dev); | ||
639 | } | ||
640 | |||
641 | vb[0] = int2float(dst_x); | ||
642 | vb[1] = 0; | ||
643 | vb[2] = int2float(src_x); | ||
644 | vb[3] = 0; | ||
645 | |||
646 | vb[4] = int2float(dst_x); | ||
647 | vb[5] = int2float(h); | ||
648 | vb[6] = int2float(src_x); | ||
649 | vb[7] = int2float(h); | ||
650 | |||
651 | vb[8] = int2float(dst_x + cur_size); | ||
652 | vb[9] = int2float(h); | ||
653 | vb[10] = int2float(src_x + cur_size); | ||
654 | vb[11] = int2float(h); | ||
655 | |||
656 | /* src */ | ||
657 | set_tex_resource(dev_priv, FMT_8, | ||
658 | src_x + cur_size, h, src_x + cur_size, | ||
659 | src_gpu_addr); | ||
660 | |||
661 | cp_set_surface_sync(dev_priv, | ||
662 | R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
663 | |||
664 | /* dst */ | ||
665 | set_render_target(dev_priv, COLOR_8, | ||
666 | dst_x + cur_size, h, | ||
667 | dst_gpu_addr); | ||
668 | |||
669 | /* scissors */ | ||
670 | set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h); | ||
671 | |||
672 | /* Vertex buffer setup */ | ||
673 | vb_addr = dev_priv->gart_buffers_offset + | ||
674 | dev_priv->blit_vb->offset + | ||
675 | dev_priv->blit_vb->used; | ||
676 | set_vtx_resource(dev_priv, vb_addr); | ||
677 | |||
678 | /* draw */ | ||
679 | draw_auto(dev_priv); | ||
680 | |||
681 | cp_set_surface_sync(dev_priv, | ||
682 | R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, | ||
683 | cur_size * h, dst_gpu_addr); | ||
684 | |||
685 | vb += 12; | ||
686 | dev_priv->blit_vb->used += 12 * 4; | ||
687 | |||
688 | src_gpu_addr += cur_size * h; | ||
689 | dst_gpu_addr += cur_size * h; | ||
690 | size_bytes -= cur_size * h; | ||
691 | } | ||
692 | } else { | ||
693 | max_bytes = 8192 * 4; | ||
694 | |||
695 | while (size_bytes) { | ||
696 | int cur_size = size_bytes; | ||
697 | int src_x = (src_gpu_addr & 255); | ||
698 | int dst_x = (dst_gpu_addr & 255); | ||
699 | int h = 1; | ||
700 | src_gpu_addr = src_gpu_addr & ~255; | ||
701 | dst_gpu_addr = dst_gpu_addr & ~255; | ||
702 | |||
703 | if (!src_x && !dst_x) { | ||
704 | h = (cur_size / max_bytes); | ||
705 | if (h > 8192) | ||
706 | h = 8192; | ||
707 | if (h == 0) | ||
708 | h = 1; | ||
709 | else | ||
710 | cur_size = max_bytes; | ||
711 | } else { | ||
712 | if (cur_size > max_bytes) | ||
713 | cur_size = max_bytes; | ||
714 | if (cur_size > (max_bytes - dst_x)) | ||
715 | cur_size = (max_bytes - dst_x); | ||
716 | if (cur_size > (max_bytes - src_x)) | ||
717 | cur_size = (max_bytes - src_x); | ||
718 | } | ||
719 | |||
720 | if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { | ||
721 | r600_nomm_put_vb(dev); | ||
722 | r600_nomm_get_vb(dev); | ||
723 | if (!dev_priv->blit_vb) | ||
724 | return; | ||
725 | |||
726 | set_shaders(dev); | ||
727 | vb = r600_nomm_get_vb_ptr(dev); | ||
728 | } | ||
729 | |||
730 | vb[0] = int2float(dst_x / 4); | ||
731 | vb[1] = 0; | ||
732 | vb[2] = int2float(src_x / 4); | ||
733 | vb[3] = 0; | ||
734 | |||
735 | vb[4] = int2float(dst_x / 4); | ||
736 | vb[5] = int2float(h); | ||
737 | vb[6] = int2float(src_x / 4); | ||
738 | vb[7] = int2float(h); | ||
739 | |||
740 | vb[8] = int2float((dst_x + cur_size) / 4); | ||
741 | vb[9] = int2float(h); | ||
742 | vb[10] = int2float((src_x + cur_size) / 4); | ||
743 | vb[11] = int2float(h); | ||
744 | |||
745 | /* src */ | ||
746 | set_tex_resource(dev_priv, FMT_8_8_8_8, | ||
747 | (src_x + cur_size) / 4, | ||
748 | h, (src_x + cur_size) / 4, | ||
749 | src_gpu_addr); | ||
750 | |||
751 | cp_set_surface_sync(dev_priv, | ||
752 | R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); | ||
753 | |||
754 | /* dst */ | ||
755 | set_render_target(dev_priv, COLOR_8_8_8_8, | ||
756 | (dst_x + cur_size) / 4, h, | ||
757 | dst_gpu_addr); | ||
758 | |||
759 | /* scissors */ | ||
760 | set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h); | ||
761 | |||
762 | /* Vertex buffer setup */ | ||
763 | vb_addr = dev_priv->gart_buffers_offset + | ||
764 | dev_priv->blit_vb->offset + | ||
765 | dev_priv->blit_vb->used; | ||
766 | set_vtx_resource(dev_priv, vb_addr); | ||
767 | |||
768 | /* draw */ | ||
769 | draw_auto(dev_priv); | ||
770 | |||
771 | cp_set_surface_sync(dev_priv, | ||
772 | R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, | ||
773 | cur_size * h, dst_gpu_addr); | ||
774 | |||
775 | vb += 12; | ||
776 | dev_priv->blit_vb->used += 12 * 4; | ||
777 | |||
778 | src_gpu_addr += cur_size * h; | ||
779 | dst_gpu_addr += cur_size * h; | ||
780 | size_bytes -= cur_size * h; | ||
781 | } | ||
782 | } | ||
783 | } | ||
784 | |||
785 | void | ||
786 | r600_blit_swap(struct drm_device *dev, | ||
787 | uint64_t src_gpu_addr, uint64_t dst_gpu_addr, | ||
788 | int sx, int sy, int dx, int dy, | ||
789 | int w, int h, int src_pitch, int dst_pitch, int cpp) | ||
790 | { | ||
791 | drm_radeon_private_t *dev_priv = dev->dev_private; | ||
792 | int cb_format, tex_format; | ||
793 | int sx2, sy2, dx2, dy2; | ||
794 | u64 vb_addr; | ||
795 | u32 *vb; | ||
796 | |||
797 | if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { | ||
798 | |||
799 | r600_nomm_put_vb(dev); | ||
800 | r600_nomm_get_vb(dev); | ||
801 | if (!dev_priv->blit_vb) | ||
802 | return; | ||
803 | |||
804 | set_shaders(dev); | ||
805 | } | ||
806 | vb = r600_nomm_get_vb_ptr(dev); | ||
807 | |||
808 | sx2 = sx + w; | ||
809 | sy2 = sy + h; | ||
810 | dx2 = dx + w; | ||
811 | dy2 = dy + h; | ||
812 | |||
813 | vb[0] = int2float(dx); | ||
814 | vb[1] = int2float(dy); | ||
815 | vb[2] = int2float(sx); | ||
816 | vb[3] = int2float(sy); | ||
817 | |||
818 | vb[4] = int2float(dx); | ||
819 | vb[5] = int2float(dy2); | ||
820 | vb[6] = int2float(sx); | ||
821 | vb[7] = int2float(sy2); | ||
822 | |||
823 | vb[8] = int2float(dx2); | ||
824 | vb[9] = int2float(dy2); | ||
825 | vb[10] = int2float(sx2); | ||
826 | vb[11] = int2float(sy2); | ||
827 | |||
828 | switch(cpp) { | ||
829 | case 4: | ||
830 | cb_format = COLOR_8_8_8_8; | ||
831 | tex_format = FMT_8_8_8_8; | ||
832 | break; | ||
833 | case 2: | ||
834 | cb_format = COLOR_5_6_5; | ||
835 | tex_format = FMT_5_6_5; | ||
836 | break; | ||
837 | default: | ||
838 | cb_format = COLOR_8; | ||
839 | tex_format = FMT_8; | ||
840 | break; | ||
841 | } | ||
842 | |||
843 | /* src */ | ||
844 | set_tex_resource(dev_priv, tex_format, | ||
845 | src_pitch / cpp, | ||
846 | sy2, src_pitch / cpp, | ||
847 | src_gpu_addr); | ||
848 | |||
849 | cp_set_surface_sync(dev_priv, | ||
850 | R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr); | ||
851 | |||
852 | /* dst */ | ||
853 | set_render_target(dev_priv, cb_format, | ||
854 | dst_pitch / cpp, dy2, | ||
855 | dst_gpu_addr); | ||
856 | |||
857 | /* scissors */ | ||
858 | set_scissors(dev_priv, dx, dy, dx2, dy2); | ||
859 | |||
860 | /* Vertex buffer setup */ | ||
861 | vb_addr = dev_priv->gart_buffers_offset + | ||
862 | dev_priv->blit_vb->offset + | ||
863 | dev_priv->blit_vb->used; | ||
864 | set_vtx_resource(dev_priv, vb_addr); | ||
865 | |||
866 | /* draw */ | ||
867 | draw_auto(dev_priv); | ||
868 | |||
869 | cp_set_surface_sync(dev_priv, | ||
870 | R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, | ||
871 | dst_pitch * dy2, dst_gpu_addr); | ||
872 | |||
873 | dev_priv->blit_vb->used += 12 * 4; | ||
874 | } | ||