aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r600_blit.c
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2009-09-07 20:10:24 -0400
committerDave Airlie <airlied@redhat.com>2009-09-07 21:15:52 -0400
commit3ce0a23d2d253185df24e22e3d5f89800bb3dd1c (patch)
tree4b4defdbe33aec7317101cce0f89c33083f8d17b /drivers/gpu/drm/radeon/r600_blit.c
parent4ce001abafafe77e5dd943d1480fc9f87894e96f (diff)
drm/radeon/kms: add r600 KMS support
This adds the r600 KMS + CS support to the Linux kernel. The r600 TTM support is quite basic and still needs more work esp around using interrupts, but the polled fencing should work okay for now. Also currently TTM is using memcpy to do VRAM moves, the code is here to use a 3D blit to do this, but isn't fully debugged yet. Authors: Alex Deucher <alexdeucher@gmail.com> Dave Airlie <airlied@redhat.com> Jerome Glisse <jglisse@redhat.com> Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_blit.c')
-rw-r--r--drivers/gpu/drm/radeon/r600_blit.c855
1 files changed, 855 insertions, 0 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
new file mode 100644
index 000000000000..c51402e92493
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -0,0 +1,855 @@
1/*
2 * Copyright 2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Alex Deucher <alexander.deucher@amd.com>
25 */
26#include "drmP.h"
27#include "drm.h"
28#include "radeon_drm.h"
29#include "radeon_drv.h"
30
31#include "r600_blit_shaders.h"
32
33#define DI_PT_RECTLIST 0x11
34#define DI_INDEX_SIZE_16_BIT 0x0
35#define DI_SRC_SEL_AUTO_INDEX 0x2
36
37#define FMT_8 0x1
38#define FMT_5_6_5 0x8
39#define FMT_8_8_8_8 0x1a
40#define COLOR_8 0x1
41#define COLOR_5_6_5 0x8
42#define COLOR_8_8_8_8 0x1a
43
44static inline void
45set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
46{
47 u32 cb_color_info;
48 int pitch, slice;
49 RING_LOCALS;
50 DRM_DEBUG("\n");
51
52 h = (h + 7) & ~7;
53 if (h < 8)
54 h = 8;
55
56 cb_color_info = ((format << 2) | (1 << 27));
57 pitch = (w / 8) - 1;
58 slice = ((w * h) / 64) - 1;
59
60 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
61 ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
62 BEGIN_RING(21 + 2);
63 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
64 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
65 OUT_RING(gpu_addr >> 8);
66 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
67 OUT_RING(2 << 0);
68 } else {
69 BEGIN_RING(21);
70 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
71 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
72 OUT_RING(gpu_addr >> 8);
73 }
74
75 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
76 OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
77 OUT_RING((pitch << 0) | (slice << 10));
78
79 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
80 OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
81 OUT_RING(0);
82
83 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
84 OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
85 OUT_RING(cb_color_info);
86
87 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
88 OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
89 OUT_RING(0);
90
91 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
92 OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
93 OUT_RING(0);
94
95 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
96 OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
97 OUT_RING(0);
98
99 ADVANCE_RING();
100}
101
102static inline void
103cp_set_surface_sync(drm_radeon_private_t *dev_priv,
104 u32 sync_type, u32 size, u64 mc_addr)
105{
106 u32 cp_coher_size;
107 RING_LOCALS;
108 DRM_DEBUG("\n");
109
110 if (size == 0xffffffff)
111 cp_coher_size = 0xffffffff;
112 else
113 cp_coher_size = ((size + 255) >> 8);
114
115 BEGIN_RING(5);
116 OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
117 OUT_RING(sync_type);
118 OUT_RING(cp_coher_size);
119 OUT_RING((mc_addr >> 8));
120 OUT_RING(10); /* poll interval */
121 ADVANCE_RING();
122}
123
124static inline void
125set_shaders(struct drm_device *dev)
126{
127 drm_radeon_private_t *dev_priv = dev->dev_private;
128 u64 gpu_addr;
129 int shader_size, i;
130 u32 *vs, *ps;
131 uint32_t sq_pgm_resources;
132 RING_LOCALS;
133 DRM_DEBUG("\n");
134
135 /* load shaders */
136 vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
137 ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
138
139 shader_size = r6xx_vs_size;
140 for (i = 0; i < shader_size; i++)
141 vs[i] = r6xx_vs[i];
142 shader_size = r6xx_ps_size;
143 for (i = 0; i < shader_size; i++)
144 ps[i] = r6xx_ps[i];
145
146 dev_priv->blit_vb->used = 512;
147
148 gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
149
150 /* setup shader regs */
151 sq_pgm_resources = (1 << 0);
152
153 BEGIN_RING(9 + 12);
154 /* VS */
155 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
156 OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
157 OUT_RING(gpu_addr >> 8);
158
159 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
160 OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
161 OUT_RING(sq_pgm_resources);
162
163 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
164 OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
165 OUT_RING(0);
166
167 /* PS */
168 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
169 OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
170 OUT_RING((gpu_addr + 256) >> 8);
171
172 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
173 OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
174 OUT_RING(sq_pgm_resources | (1 << 28));
175
176 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
177 OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
178 OUT_RING(2);
179
180 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
181 OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
182 OUT_RING(0);
183 ADVANCE_RING();
184
185 cp_set_surface_sync(dev_priv,
186 R600_SH_ACTION_ENA, 512, gpu_addr);
187}
188
189static inline void
190set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
191{
192 uint32_t sq_vtx_constant_word2;
193 RING_LOCALS;
194 DRM_DEBUG("\n");
195
196 sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
197
198 BEGIN_RING(9);
199 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
200 OUT_RING(0x460);
201 OUT_RING(gpu_addr & 0xffffffff);
202 OUT_RING(48 - 1);
203 OUT_RING(sq_vtx_constant_word2);
204 OUT_RING(1 << 0);
205 OUT_RING(0);
206 OUT_RING(0);
207 OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
208 ADVANCE_RING();
209
210 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
211 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
212 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
213 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
214 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
215 cp_set_surface_sync(dev_priv,
216 R600_TC_ACTION_ENA, 48, gpu_addr);
217 else
218 cp_set_surface_sync(dev_priv,
219 R600_VC_ACTION_ENA, 48, gpu_addr);
220}
221
222static inline void
223set_tex_resource(drm_radeon_private_t *dev_priv,
224 int format, int w, int h, int pitch, u64 gpu_addr)
225{
226 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
227 RING_LOCALS;
228 DRM_DEBUG("\n");
229
230 if (h < 1)
231 h = 1;
232
233 sq_tex_resource_word0 = (1 << 0);
234 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
235 ((w - 1) << 19));
236
237 sq_tex_resource_word1 = (format << 26);
238 sq_tex_resource_word1 |= ((h - 1) << 0);
239
240 sq_tex_resource_word4 = ((1 << 14) |
241 (0 << 16) |
242 (1 << 19) |
243 (2 << 22) |
244 (3 << 25));
245
246 BEGIN_RING(9);
247 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
248 OUT_RING(0);
249 OUT_RING(sq_tex_resource_word0);
250 OUT_RING(sq_tex_resource_word1);
251 OUT_RING(gpu_addr >> 8);
252 OUT_RING(gpu_addr >> 8);
253 OUT_RING(sq_tex_resource_word4);
254 OUT_RING(0);
255 OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
256 ADVANCE_RING();
257
258}
259
260static inline void
261set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
262{
263 RING_LOCALS;
264 DRM_DEBUG("\n");
265
266 BEGIN_RING(12);
267 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
268 OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
269 OUT_RING((x1 << 0) | (y1 << 16));
270 OUT_RING((x2 << 0) | (y2 << 16));
271
272 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
273 OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
274 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
275 OUT_RING((x2 << 0) | (y2 << 16));
276
277 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
278 OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
279 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
280 OUT_RING((x2 << 0) | (y2 << 16));
281 ADVANCE_RING();
282}
283
284static inline void
285draw_auto(drm_radeon_private_t *dev_priv)
286{
287 RING_LOCALS;
288 DRM_DEBUG("\n");
289
290 BEGIN_RING(10);
291 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
292 OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
293 OUT_RING(DI_PT_RECTLIST);
294
295 OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
296 OUT_RING(DI_INDEX_SIZE_16_BIT);
297
298 OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
299 OUT_RING(1);
300
301 OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
302 OUT_RING(3);
303 OUT_RING(DI_SRC_SEL_AUTO_INDEX);
304
305 ADVANCE_RING();
306 COMMIT_RING();
307}
308
309static inline void
310set_default_state(drm_radeon_private_t *dev_priv)
311{
312 int default_state_dw, i;
313 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
314 u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
315 int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
316 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
317 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
318 RING_LOCALS;
319
320 switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
321 case CHIP_R600:
322 num_ps_gprs = 192;
323 num_vs_gprs = 56;
324 num_temp_gprs = 4;
325 num_gs_gprs = 0;
326 num_es_gprs = 0;
327 num_ps_threads = 136;
328 num_vs_threads = 48;
329 num_gs_threads = 4;
330 num_es_threads = 4;
331 num_ps_stack_entries = 128;
332 num_vs_stack_entries = 128;
333 num_gs_stack_entries = 0;
334 num_es_stack_entries = 0;
335 break;
336 case CHIP_RV630:
337 case CHIP_RV635:
338 num_ps_gprs = 84;
339 num_vs_gprs = 36;
340 num_temp_gprs = 4;
341 num_gs_gprs = 0;
342 num_es_gprs = 0;
343 num_ps_threads = 144;
344 num_vs_threads = 40;
345 num_gs_threads = 4;
346 num_es_threads = 4;
347 num_ps_stack_entries = 40;
348 num_vs_stack_entries = 40;
349 num_gs_stack_entries = 32;
350 num_es_stack_entries = 16;
351 break;
352 case CHIP_RV610:
353 case CHIP_RV620:
354 case CHIP_RS780:
355 case CHIP_RS880:
356 default:
357 num_ps_gprs = 84;
358 num_vs_gprs = 36;
359 num_temp_gprs = 4;
360 num_gs_gprs = 0;
361 num_es_gprs = 0;
362 num_ps_threads = 136;
363 num_vs_threads = 48;
364 num_gs_threads = 4;
365 num_es_threads = 4;
366 num_ps_stack_entries = 40;
367 num_vs_stack_entries = 40;
368 num_gs_stack_entries = 32;
369 num_es_stack_entries = 16;
370 break;
371 case CHIP_RV670:
372 num_ps_gprs = 144;
373 num_vs_gprs = 40;
374 num_temp_gprs = 4;
375 num_gs_gprs = 0;
376 num_es_gprs = 0;
377 num_ps_threads = 136;
378 num_vs_threads = 48;
379 num_gs_threads = 4;
380 num_es_threads = 4;
381 num_ps_stack_entries = 40;
382 num_vs_stack_entries = 40;
383 num_gs_stack_entries = 32;
384 num_es_stack_entries = 16;
385 break;
386 case CHIP_RV770:
387 num_ps_gprs = 192;
388 num_vs_gprs = 56;
389 num_temp_gprs = 4;
390 num_gs_gprs = 0;
391 num_es_gprs = 0;
392 num_ps_threads = 188;
393 num_vs_threads = 60;
394 num_gs_threads = 0;
395 num_es_threads = 0;
396 num_ps_stack_entries = 256;
397 num_vs_stack_entries = 256;
398 num_gs_stack_entries = 0;
399 num_es_stack_entries = 0;
400 break;
401 case CHIP_RV730:
402 case CHIP_RV740:
403 num_ps_gprs = 84;
404 num_vs_gprs = 36;
405 num_temp_gprs = 4;
406 num_gs_gprs = 0;
407 num_es_gprs = 0;
408 num_ps_threads = 188;
409 num_vs_threads = 60;
410 num_gs_threads = 0;
411 num_es_threads = 0;
412 num_ps_stack_entries = 128;
413 num_vs_stack_entries = 128;
414 num_gs_stack_entries = 0;
415 num_es_stack_entries = 0;
416 break;
417 case CHIP_RV710:
418 num_ps_gprs = 192;
419 num_vs_gprs = 56;
420 num_temp_gprs = 4;
421 num_gs_gprs = 0;
422 num_es_gprs = 0;
423 num_ps_threads = 144;
424 num_vs_threads = 48;
425 num_gs_threads = 0;
426 num_es_threads = 0;
427 num_ps_stack_entries = 128;
428 num_vs_stack_entries = 128;
429 num_gs_stack_entries = 0;
430 num_es_stack_entries = 0;
431 break;
432 }
433
434 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
435 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
436 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
437 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
438 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
439 sq_config = 0;
440 else
441 sq_config = R600_VC_ENABLE;
442
443 sq_config |= (R600_DX9_CONSTS |
444 R600_ALU_INST_PREFER_VECTOR |
445 R600_PS_PRIO(0) |
446 R600_VS_PRIO(1) |
447 R600_GS_PRIO(2) |
448 R600_ES_PRIO(3));
449
450 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
451 R600_NUM_VS_GPRS(num_vs_gprs) |
452 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
453 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
454 R600_NUM_ES_GPRS(num_es_gprs));
455 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
456 R600_NUM_VS_THREADS(num_vs_threads) |
457 R600_NUM_GS_THREADS(num_gs_threads) |
458 R600_NUM_ES_THREADS(num_es_threads));
459 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
460 R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
461 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
462 R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
463
464 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
465 default_state_dw = r7xx_default_size * 4;
466 BEGIN_RING(default_state_dw + 10);
467 for (i = 0; i < default_state_dw; i++)
468 OUT_RING(r7xx_default_state[i]);
469 } else {
470 default_state_dw = r6xx_default_size * 4;
471 BEGIN_RING(default_state_dw + 10);
472 for (i = 0; i < default_state_dw; i++)
473 OUT_RING(r6xx_default_state[i]);
474 }
475 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
476 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
477 /* SQ config */
478 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
479 OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
480 OUT_RING(sq_config);
481 OUT_RING(sq_gpr_resource_mgmt_1);
482 OUT_RING(sq_gpr_resource_mgmt_2);
483 OUT_RING(sq_thread_resource_mgmt);
484 OUT_RING(sq_stack_resource_mgmt_1);
485 OUT_RING(sq_stack_resource_mgmt_2);
486 ADVANCE_RING();
487}
488
489static inline uint32_t i2f(uint32_t input)
490{
491 u32 result, i, exponent, fraction;
492
493 if ((input & 0x3fff) == 0)
494 result = 0; /* 0 is a special case */
495 else {
496 exponent = 140; /* exponent biased by 127; */
497 fraction = (input & 0x3fff) << 10; /* cheat and only
498 handle numbers below 2^^15 */
499 for (i = 0; i < 14; i++) {
500 if (fraction & 0x800000)
501 break;
502 else {
503 fraction = fraction << 1; /* keep
504 shifting left until top bit = 1 */
505 exponent = exponent - 1;
506 }
507 }
508 result = exponent << 23 | (fraction & 0x7fffff); /* mask
509 off top bit; assumed 1 */
510 }
511 return result;
512}
513
514
515int r600_nomm_get_vb(struct drm_device *dev)
516{
517 drm_radeon_private_t *dev_priv = dev->dev_private;
518 dev_priv->blit_vb = radeon_freelist_get(dev);
519 if (!dev_priv->blit_vb) {
520 DRM_ERROR("Unable to allocate vertex buffer for blit\n");
521 return -EAGAIN;
522 }
523 return 0;
524}
525
526void r600_nomm_put_vb(struct drm_device *dev)
527{
528 drm_radeon_private_t *dev_priv = dev->dev_private;
529
530 dev_priv->blit_vb->used = 0;
531 radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
532}
533
534void *r600_nomm_get_vb_ptr(struct drm_device *dev)
535{
536 drm_radeon_private_t *dev_priv = dev->dev_private;
537 return (((char *)dev->agp_buffer_map->handle +
538 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
539}
540
541int
542r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
543{
544 drm_radeon_private_t *dev_priv = dev->dev_private;
545 DRM_DEBUG("\n");
546
547 r600_nomm_get_vb(dev);
548
549 dev_priv->blit_vb->file_priv = file_priv;
550
551 set_default_state(dev_priv);
552 set_shaders(dev);
553
554 return 0;
555}
556
557
558void
559r600_done_blit_copy(struct drm_device *dev)
560{
561 drm_radeon_private_t *dev_priv = dev->dev_private;
562 RING_LOCALS;
563 DRM_DEBUG("\n");
564
565 BEGIN_RING(5);
566 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
567 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
568 /* wait for 3D idle clean */
569 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
570 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
571 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
572
573 ADVANCE_RING();
574 COMMIT_RING();
575
576 r600_nomm_put_vb(dev);
577}
578
579void
580r600_blit_copy(struct drm_device *dev,
581 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
582 int size_bytes)
583{
584 drm_radeon_private_t *dev_priv = dev->dev_private;
585 int max_bytes;
586 u64 vb_addr;
587 u32 *vb;
588
589 vb = r600_nomm_get_vb_ptr(dev);
590
591 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
592 max_bytes = 8192;
593
594 while (size_bytes) {
595 int cur_size = size_bytes;
596 int src_x = src_gpu_addr & 255;
597 int dst_x = dst_gpu_addr & 255;
598 int h = 1;
599 src_gpu_addr = src_gpu_addr & ~255;
600 dst_gpu_addr = dst_gpu_addr & ~255;
601
602 if (!src_x && !dst_x) {
603 h = (cur_size / max_bytes);
604 if (h > 8192)
605 h = 8192;
606 if (h == 0)
607 h = 1;
608 else
609 cur_size = max_bytes;
610 } else {
611 if (cur_size > max_bytes)
612 cur_size = max_bytes;
613 if (cur_size > (max_bytes - dst_x))
614 cur_size = (max_bytes - dst_x);
615 if (cur_size > (max_bytes - src_x))
616 cur_size = (max_bytes - src_x);
617 }
618
619 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
620
621 r600_nomm_put_vb(dev);
622 r600_nomm_get_vb(dev);
623 if (!dev_priv->blit_vb)
624 return;
625 set_shaders(dev);
626 vb = r600_nomm_get_vb_ptr(dev);
627 }
628
629 vb[0] = i2f(dst_x);
630 vb[1] = 0;
631 vb[2] = i2f(src_x);
632 vb[3] = 0;
633
634 vb[4] = i2f(dst_x);
635 vb[5] = i2f(h);
636 vb[6] = i2f(src_x);
637 vb[7] = i2f(h);
638
639 vb[8] = i2f(dst_x + cur_size);
640 vb[9] = i2f(h);
641 vb[10] = i2f(src_x + cur_size);
642 vb[11] = i2f(h);
643
644 /* src */
645 set_tex_resource(dev_priv, FMT_8,
646 src_x + cur_size, h, src_x + cur_size,
647 src_gpu_addr);
648
649 cp_set_surface_sync(dev_priv,
650 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
651
652 /* dst */
653 set_render_target(dev_priv, COLOR_8,
654 dst_x + cur_size, h,
655 dst_gpu_addr);
656
657 /* scissors */
658 set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
659
660 /* Vertex buffer setup */
661 vb_addr = dev_priv->gart_buffers_offset +
662 dev_priv->blit_vb->offset +
663 dev_priv->blit_vb->used;
664 set_vtx_resource(dev_priv, vb_addr);
665
666 /* draw */
667 draw_auto(dev_priv);
668
669 cp_set_surface_sync(dev_priv,
670 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
671 cur_size * h, dst_gpu_addr);
672
673 vb += 12;
674 dev_priv->blit_vb->used += 12 * 4;
675
676 src_gpu_addr += cur_size * h;
677 dst_gpu_addr += cur_size * h;
678 size_bytes -= cur_size * h;
679 }
680 } else {
681 max_bytes = 8192 * 4;
682
683 while (size_bytes) {
684 int cur_size = size_bytes;
685 int src_x = (src_gpu_addr & 255);
686 int dst_x = (dst_gpu_addr & 255);
687 int h = 1;
688 src_gpu_addr = src_gpu_addr & ~255;
689 dst_gpu_addr = dst_gpu_addr & ~255;
690
691 if (!src_x && !dst_x) {
692 h = (cur_size / max_bytes);
693 if (h > 8192)
694 h = 8192;
695 if (h == 0)
696 h = 1;
697 else
698 cur_size = max_bytes;
699 } else {
700 if (cur_size > max_bytes)
701 cur_size = max_bytes;
702 if (cur_size > (max_bytes - dst_x))
703 cur_size = (max_bytes - dst_x);
704 if (cur_size > (max_bytes - src_x))
705 cur_size = (max_bytes - src_x);
706 }
707
708 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
709 r600_nomm_put_vb(dev);
710 r600_nomm_get_vb(dev);
711 if (!dev_priv->blit_vb)
712 return;
713
714 set_shaders(dev);
715 vb = r600_nomm_get_vb_ptr(dev);
716 }
717
718 vb[0] = i2f(dst_x / 4);
719 vb[1] = 0;
720 vb[2] = i2f(src_x / 4);
721 vb[3] = 0;
722
723 vb[4] = i2f(dst_x / 4);
724 vb[5] = i2f(h);
725 vb[6] = i2f(src_x / 4);
726 vb[7] = i2f(h);
727
728 vb[8] = i2f((dst_x + cur_size) / 4);
729 vb[9] = i2f(h);
730 vb[10] = i2f((src_x + cur_size) / 4);
731 vb[11] = i2f(h);
732
733 /* src */
734 set_tex_resource(dev_priv, FMT_8_8_8_8,
735 (src_x + cur_size) / 4,
736 h, (src_x + cur_size) / 4,
737 src_gpu_addr);
738
739 cp_set_surface_sync(dev_priv,
740 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
741
742 /* dst */
743 set_render_target(dev_priv, COLOR_8_8_8_8,
744 dst_x + cur_size, h,
745 dst_gpu_addr);
746
747 /* scissors */
748 set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
749
750 /* Vertex buffer setup */
751 vb_addr = dev_priv->gart_buffers_offset +
752 dev_priv->blit_vb->offset +
753 dev_priv->blit_vb->used;
754 set_vtx_resource(dev_priv, vb_addr);
755
756 /* draw */
757 draw_auto(dev_priv);
758
759 cp_set_surface_sync(dev_priv,
760 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
761 cur_size * h, dst_gpu_addr);
762
763 vb += 12;
764 dev_priv->blit_vb->used += 12 * 4;
765
766 src_gpu_addr += cur_size * h;
767 dst_gpu_addr += cur_size * h;
768 size_bytes -= cur_size * h;
769 }
770 }
771}
772
773void
774r600_blit_swap(struct drm_device *dev,
775 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
776 int sx, int sy, int dx, int dy,
777 int w, int h, int src_pitch, int dst_pitch, int cpp)
778{
779 drm_radeon_private_t *dev_priv = dev->dev_private;
780 int cb_format, tex_format;
781 u64 vb_addr;
782 u32 *vb;
783
784 vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
785 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
786
787 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
788
789 r600_nomm_put_vb(dev);
790 r600_nomm_get_vb(dev);
791 if (!dev_priv->blit_vb)
792 return;
793
794 set_shaders(dev);
795 vb = r600_nomm_get_vb_ptr(dev);
796 }
797
798 if (cpp == 4) {
799 cb_format = COLOR_8_8_8_8;
800 tex_format = FMT_8_8_8_8;
801 } else if (cpp == 2) {
802 cb_format = COLOR_5_6_5;
803 tex_format = FMT_5_6_5;
804 } else {
805 cb_format = COLOR_8;
806 tex_format = FMT_8;
807 }
808
809 vb[0] = i2f(dx);
810 vb[1] = i2f(dy);
811 vb[2] = i2f(sx);
812 vb[3] = i2f(sy);
813
814 vb[4] = i2f(dx);
815 vb[5] = i2f(dy + h);
816 vb[6] = i2f(sx);
817 vb[7] = i2f(sy + h);
818
819 vb[8] = i2f(dx + w);
820 vb[9] = i2f(dy + h);
821 vb[10] = i2f(sx + w);
822 vb[11] = i2f(sy + h);
823
824 /* src */
825 set_tex_resource(dev_priv, tex_format,
826 src_pitch / cpp,
827 sy + h, src_pitch / cpp,
828 src_gpu_addr);
829
830 cp_set_surface_sync(dev_priv,
831 R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr);
832
833 /* dst */
834 set_render_target(dev_priv, cb_format,
835 dst_pitch / cpp, dy + h,
836 dst_gpu_addr);
837
838 /* scissors */
839 set_scissors(dev_priv, dx, dy, dx + w, dy + h);
840
841 /* Vertex buffer setup */
842 vb_addr = dev_priv->gart_buffers_offset +
843 dev_priv->blit_vb->offset +
844 dev_priv->blit_vb->used;
845 set_vtx_resource(dev_priv, vb_addr);
846
847 /* draw */
848 draw_auto(dev_priv);
849
850 cp_set_surface_sync(dev_priv,
851 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
852 dst_pitch * (dy + h), dst_gpu_addr);
853
854 dev_priv->blit_vb->used += 12 * 4;
855}