aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2010-09-09 11:33:36 -0400
committerDave Airlie <airlied@redhat.com>2010-10-05 21:46:30 -0400
commitd7ccd8fc11700502b5a104b7bad595b492a3aa1b (patch)
treedf2a228494a959470d3efd5a7c8cde9e642aca21
parent75fa0b08e50cb72715b58321e8259c47adfe4c6f (diff)
drm/radeon/kms: add drm blit support for evergreen
This patch implements blit support for bo moves using the 3D engine. It uses the same method as r6xx/r7xx: - store the base state in an IB - emit variable state and vertex buffers to do the blit This allows the hw to move bos using the 3D engine and allows full use of vram beyond the pci aperture size. Signed-off-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/radeon/Makefile2
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c51
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c776
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.c359
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.h35
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h5
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c26
-rw-r--r--drivers/gpu/drm/radeon/radeon.h8
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h8
10 files changed, 1221 insertions, 55 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index aebe00875041..6cae4f2028d2 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -65,7 +65,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
65 rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ 65 rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
66 r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ 66 r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
67 r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ 67 r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
68 evergreen.o evergreen_cs.o 68 evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o
69 69
70radeon-$(CONFIG_COMPAT) += radeon_ioc32.o 70radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
71radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o 71radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 4c82cc830271..aee61ae24402 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2062,26 +2062,13 @@ static int evergreen_startup(struct radeon_device *rdev)
2062 return r; 2062 return r;
2063 } 2063 }
2064 evergreen_gpu_init(rdev); 2064 evergreen_gpu_init(rdev);
2065#if 0
2066 if (!rdev->r600_blit.shader_obj) {
2067 r = r600_blit_init(rdev);
2068 if (r) {
2069 DRM_ERROR("radeon: failed blitter (%d).\n", r);
2070 return r;
2071 }
2072 }
2073 2065
2074 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 2066 r = evergreen_blit_init(rdev);
2075 if (unlikely(r != 0))
2076 return r;
2077 r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
2078 &rdev->r600_blit.shader_gpu_addr);
2079 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
2080 if (r) { 2067 if (r) {
2081 DRM_ERROR("failed to pin blit object %d\n", r); 2068 evergreen_blit_fini(rdev);
2082 return r; 2069 rdev->asic->copy = NULL;
2070 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
2083 } 2071 }
2084#endif
2085 2072
2086 /* allocate wb buffer */ 2073 /* allocate wb buffer */
2087 r = radeon_wb_init(rdev); 2074 r = radeon_wb_init(rdev);
@@ -2139,23 +2126,43 @@ int evergreen_resume(struct radeon_device *rdev)
2139 2126
2140int evergreen_suspend(struct radeon_device *rdev) 2127int evergreen_suspend(struct radeon_device *rdev)
2141{ 2128{
2142#if 0
2143 int r; 2129 int r;
2144#endif 2130
2145 /* FIXME: we should wait for ring to be empty */ 2131 /* FIXME: we should wait for ring to be empty */
2146 r700_cp_stop(rdev); 2132 r700_cp_stop(rdev);
2147 rdev->cp.ready = false; 2133 rdev->cp.ready = false;
2148 evergreen_irq_suspend(rdev); 2134 evergreen_irq_suspend(rdev);
2149 radeon_wb_disable(rdev); 2135 radeon_wb_disable(rdev);
2150 evergreen_pcie_gart_disable(rdev); 2136 evergreen_pcie_gart_disable(rdev);
2151#if 0 2137
2152 /* unpin shaders bo */ 2138 /* unpin shaders bo */
2153 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 2139 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
2154 if (likely(r == 0)) { 2140 if (likely(r == 0)) {
2155 radeon_bo_unpin(rdev->r600_blit.shader_obj); 2141 radeon_bo_unpin(rdev->r600_blit.shader_obj);
2156 radeon_bo_unreserve(rdev->r600_blit.shader_obj); 2142 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
2157 } 2143 }
2158#endif 2144
2145 return 0;
2146}
2147
2148int evergreen_copy_blit(struct radeon_device *rdev,
2149 uint64_t src_offset, uint64_t dst_offset,
2150 unsigned num_pages, struct radeon_fence *fence)
2151{
2152 int r;
2153
2154 mutex_lock(&rdev->r600_blit.mutex);
2155 rdev->r600_blit.vb_ib = NULL;
2156 r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
2157 if (r) {
2158 if (rdev->r600_blit.vb_ib)
2159 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
2160 mutex_unlock(&rdev->r600_blit.mutex);
2161 return r;
2162 }
2163 evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
2164 evergreen_blit_done_copy(rdev, fence);
2165 mutex_unlock(&rdev->r600_blit.mutex);
2159 return 0; 2166 return 0;
2160} 2167}
2161 2168
@@ -2286,7 +2293,7 @@ int evergreen_init(struct radeon_device *rdev)
2286 2293
2287void evergreen_fini(struct radeon_device *rdev) 2294void evergreen_fini(struct radeon_device *rdev)
2288{ 2295{
2289 /*r600_blit_fini(rdev);*/ 2296 evergreen_blit_fini(rdev);
2290 r700_cp_fini(rdev); 2297 r700_cp_fini(rdev);
2291 r600_irq_fini(rdev); 2298 r600_irq_fini(rdev);
2292 radeon_wb_fini(rdev); 2299 radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
new file mode 100644
index 000000000000..ce1ae4a2aa54
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -0,0 +1,776 @@
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Alex Deucher <alexander.deucher@amd.com>
25 */
26
27#include "drmP.h"
28#include "drm.h"
29#include "radeon_drm.h"
30#include "radeon.h"
31
32#include "evergreend.h"
33#include "evergreen_blit_shaders.h"
34
35#define DI_PT_RECTLIST 0x11
36#define DI_INDEX_SIZE_16_BIT 0x0
37#define DI_SRC_SEL_AUTO_INDEX 0x2
38
39#define FMT_8 0x1
40#define FMT_5_6_5 0x8
41#define FMT_8_8_8_8 0x1a
42#define COLOR_8 0x1
43#define COLOR_5_6_5 0x8
44#define COLOR_8_8_8_8 0x1a
45
46/* emits 17 */
47static void
48set_render_target(struct radeon_device *rdev, int format,
49 int w, int h, u64 gpu_addr)
50{
51 u32 cb_color_info;
52 int pitch, slice;
53
54 h = ALIGN(h, 8);
55 if (h < 8)
56 h = 8;
57
58 cb_color_info = ((format << 2) | (1 << 24));
59 pitch = (w / 8) - 1;
60 slice = ((w * h) / 64) - 1;
61
62 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
63 radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
64 radeon_ring_write(rdev, gpu_addr >> 8);
65 radeon_ring_write(rdev, pitch);
66 radeon_ring_write(rdev, slice);
67 radeon_ring_write(rdev, 0);
68 radeon_ring_write(rdev, cb_color_info);
69 radeon_ring_write(rdev, (1 << 4));
70 radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
71 radeon_ring_write(rdev, 0);
72 radeon_ring_write(rdev, 0);
73 radeon_ring_write(rdev, 0);
74 radeon_ring_write(rdev, 0);
75 radeon_ring_write(rdev, 0);
76 radeon_ring_write(rdev, 0);
77 radeon_ring_write(rdev, 0);
78 radeon_ring_write(rdev, 0);
79}
80
81/* emits 5dw */
82static void
83cp_set_surface_sync(struct radeon_device *rdev,
84 u32 sync_type, u32 size,
85 u64 mc_addr)
86{
87 u32 cp_coher_size;
88
89 if (size == 0xffffffff)
90 cp_coher_size = 0xffffffff;
91 else
92 cp_coher_size = ((size + 255) >> 8);
93
94 radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
95 radeon_ring_write(rdev, sync_type);
96 radeon_ring_write(rdev, cp_coher_size);
97 radeon_ring_write(rdev, mc_addr >> 8);
98 radeon_ring_write(rdev, 10); /* poll interval */
99}
100
101/* emits 11dw + 1 surface sync = 16dw */
102static void
103set_shaders(struct radeon_device *rdev)
104{
105 u64 gpu_addr;
106
107 /* VS */
108 gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
109 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
110 radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
111 radeon_ring_write(rdev, gpu_addr >> 8);
112 radeon_ring_write(rdev, 2);
113 radeon_ring_write(rdev, 0);
114
115 /* PS */
116 gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
117 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
118 radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
119 radeon_ring_write(rdev, gpu_addr >> 8);
120 radeon_ring_write(rdev, 1);
121 radeon_ring_write(rdev, 0);
122 radeon_ring_write(rdev, 2);
123
124 gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
125 cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
126}
127
128/* emits 10 + 1 sync (5) = 15 */
129static void
130set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
131{
132 u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
133
134 /* high addr, stride */
135 sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
136 /* xyzw swizzles */
137 sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
138
139 radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
140 radeon_ring_write(rdev, 0x580);
141 radeon_ring_write(rdev, gpu_addr & 0xffffffff);
142 radeon_ring_write(rdev, 48 - 1); /* size */
143 radeon_ring_write(rdev, sq_vtx_constant_word2);
144 radeon_ring_write(rdev, sq_vtx_constant_word3);
145 radeon_ring_write(rdev, 0);
146 radeon_ring_write(rdev, 0);
147 radeon_ring_write(rdev, 0);
148 radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
149
150 if (rdev->family == CHIP_CEDAR)
151 cp_set_surface_sync(rdev,
152 PACKET3_TC_ACTION_ENA, 48, gpu_addr);
153 else
154 cp_set_surface_sync(rdev,
155 PACKET3_VC_ACTION_ENA, 48, gpu_addr);
156
157}
158
159/* emits 10 */
160static void
161set_tex_resource(struct radeon_device *rdev,
162 int format, int w, int h, int pitch,
163 u64 gpu_addr)
164{
165 u32 sq_tex_resource_word0, sq_tex_resource_word1;
166 u32 sq_tex_resource_word4, sq_tex_resource_word7;
167
168 if (h < 1)
169 h = 1;
170
171 sq_tex_resource_word0 = (1 << 0); /* 2D */
172 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
173 ((w - 1) << 18));
174 sq_tex_resource_word1 = ((h - 1) << 0);
175 /* xyzw swizzles */
176 sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
177
178 sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
179
180 radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
181 radeon_ring_write(rdev, 0);
182 radeon_ring_write(rdev, sq_tex_resource_word0);
183 radeon_ring_write(rdev, sq_tex_resource_word1);
184 radeon_ring_write(rdev, gpu_addr >> 8);
185 radeon_ring_write(rdev, gpu_addr >> 8);
186 radeon_ring_write(rdev, sq_tex_resource_word4);
187 radeon_ring_write(rdev, 0);
188 radeon_ring_write(rdev, 0);
189 radeon_ring_write(rdev, sq_tex_resource_word7);
190}
191
192/* emits 12 */
193static void
194set_scissors(struct radeon_device *rdev, int x1, int y1,
195 int x2, int y2)
196{
197 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
198 radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
199 radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
200 radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
201
202 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
203 radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
204 radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
205 radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
206
207 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
208 radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
209 radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
210 radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
211}
212
213/* emits 10 */
214static void
215draw_auto(struct radeon_device *rdev)
216{
217 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
218 radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
219 radeon_ring_write(rdev, DI_PT_RECTLIST);
220
221 radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
222 radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
223
224 radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
225 radeon_ring_write(rdev, 1);
226
227 radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
228 radeon_ring_write(rdev, 3);
229 radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
230
231}
232
233/* emits 20 */
234static void
235set_default_state(struct radeon_device *rdev)
236{
237 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
238 u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
239 u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
240 int num_ps_gprs, num_vs_gprs, num_temp_gprs;
241 int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs;
242 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
243 int num_hs_threads, num_ls_threads;
244 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
245 int num_hs_stack_entries, num_ls_stack_entries;
246 u64 gpu_addr;
247 int dwords;
248
249 switch (rdev->family) {
250 case CHIP_CEDAR:
251 default:
252 num_ps_gprs = 93;
253 num_vs_gprs = 46;
254 num_temp_gprs = 4;
255 num_gs_gprs = 31;
256 num_es_gprs = 31;
257 num_hs_gprs = 23;
258 num_ls_gprs = 23;
259 num_ps_threads = 96;
260 num_vs_threads = 16;
261 num_gs_threads = 16;
262 num_es_threads = 16;
263 num_hs_threads = 16;
264 num_ls_threads = 16;
265 num_ps_stack_entries = 42;
266 num_vs_stack_entries = 42;
267 num_gs_stack_entries = 42;
268 num_es_stack_entries = 42;
269 num_hs_stack_entries = 42;
270 num_ls_stack_entries = 42;
271 break;
272 case CHIP_REDWOOD:
273 num_ps_gprs = 93;
274 num_vs_gprs = 46;
275 num_temp_gprs = 4;
276 num_gs_gprs = 31;
277 num_es_gprs = 31;
278 num_hs_gprs = 23;
279 num_ls_gprs = 23;
280 num_ps_threads = 128;
281 num_vs_threads = 20;
282 num_gs_threads = 20;
283 num_es_threads = 20;
284 num_hs_threads = 20;
285 num_ls_threads = 20;
286 num_ps_stack_entries = 42;
287 num_vs_stack_entries = 42;
288 num_gs_stack_entries = 42;
289 num_es_stack_entries = 42;
290 num_hs_stack_entries = 42;
291 num_ls_stack_entries = 42;
292 break;
293 case CHIP_JUNIPER:
294 num_ps_gprs = 93;
295 num_vs_gprs = 46;
296 num_temp_gprs = 4;
297 num_gs_gprs = 31;
298 num_es_gprs = 31;
299 num_hs_gprs = 23;
300 num_ls_gprs = 23;
301 num_ps_threads = 128;
302 num_vs_threads = 20;
303 num_gs_threads = 20;
304 num_es_threads = 20;
305 num_hs_threads = 20;
306 num_ls_threads = 20;
307 num_ps_stack_entries = 85;
308 num_vs_stack_entries = 85;
309 num_gs_stack_entries = 85;
310 num_es_stack_entries = 85;
311 num_hs_stack_entries = 85;
312 num_ls_stack_entries = 85;
313 break;
314 case CHIP_CYPRESS:
315 case CHIP_HEMLOCK:
316 num_ps_gprs = 93;
317 num_vs_gprs = 46;
318 num_temp_gprs = 4;
319 num_gs_gprs = 31;
320 num_es_gprs = 31;
321 num_hs_gprs = 23;
322 num_ls_gprs = 23;
323 num_ps_threads = 128;
324 num_vs_threads = 20;
325 num_gs_threads = 20;
326 num_es_threads = 20;
327 num_hs_threads = 20;
328 num_ls_threads = 20;
329 num_ps_stack_entries = 85;
330 num_vs_stack_entries = 85;
331 num_gs_stack_entries = 85;
332 num_es_stack_entries = 85;
333 num_hs_stack_entries = 85;
334 num_ls_stack_entries = 85;
335 break;
336 }
337
338 if (rdev->family == CHIP_CEDAR)
339 sq_config = 0;
340 else
341 sq_config = VC_ENABLE;
342
343 sq_config |= (EXPORT_SRC_C |
344 CS_PRIO(0) |
345 LS_PRIO(0) |
346 HS_PRIO(0) |
347 PS_PRIO(0) |
348 VS_PRIO(1) |
349 GS_PRIO(2) |
350 ES_PRIO(3));
351
352 sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
353 NUM_VS_GPRS(num_vs_gprs) |
354 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
355 sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
356 NUM_ES_GPRS(num_es_gprs));
357 sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
358 NUM_LS_GPRS(num_ls_gprs));
359 sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
360 NUM_VS_THREADS(num_vs_threads) |
361 NUM_GS_THREADS(num_gs_threads) |
362 NUM_ES_THREADS(num_es_threads));
363 sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
364 NUM_LS_THREADS(num_ls_threads));
365 sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
366 NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
367 sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
368 NUM_ES_STACK_ENTRIES(num_es_stack_entries));
369 sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
370 NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
371
372 /* emit an IB pointing at default state */
373 dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
374 gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
375 radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
376 radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
377 radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
378 radeon_ring_write(rdev, dwords);
379
380 /* disable dyn gprs */
381 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
382 radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
383 radeon_ring_write(rdev, 0);
384
385 /* SQ config */
386 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
387 radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
388 radeon_ring_write(rdev, sq_config);
389 radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
390 radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
391 radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
392 radeon_ring_write(rdev, 0);
393 radeon_ring_write(rdev, 0);
394 radeon_ring_write(rdev, sq_thread_resource_mgmt);
395 radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
396 radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
397 radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
398 radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
399}
400
401static inline uint32_t i2f(uint32_t input)
402{
403 u32 result, i, exponent, fraction;
404
405 if ((input & 0x3fff) == 0)
406 result = 0; /* 0 is a special case */
407 else {
408 exponent = 140; /* exponent biased by 127; */
409 fraction = (input & 0x3fff) << 10; /* cheat and only
410 handle numbers below 2^^15 */
411 for (i = 0; i < 14; i++) {
412 if (fraction & 0x800000)
413 break;
414 else {
415 fraction = fraction << 1; /* keep
416 shifting left until top bit = 1 */
417 exponent = exponent - 1;
418 }
419 }
420 result = exponent << 23 | (fraction & 0x7fffff); /* mask
421 off top bit; assumed 1 */
422 }
423 return result;
424}
425
426int evergreen_blit_init(struct radeon_device *rdev)
427{
428 u32 obj_size;
429 int r, dwords;
430 void *ptr;
431 u32 packet2s[16];
432 int num_packet2s = 0;
433
434 /* pin copy shader into vram if already initialized */
435 if (rdev->r600_blit.shader_obj)
436 goto done;
437
438 mutex_init(&rdev->r600_blit.mutex);
439 rdev->r600_blit.state_offset = 0;
440
441 rdev->r600_blit.state_len = evergreen_default_size;
442
443 dwords = rdev->r600_blit.state_len;
444 while (dwords & 0xf) {
445 packet2s[num_packet2s++] = PACKET2(0);
446 dwords++;
447 }
448
449 obj_size = dwords * 4;
450 obj_size = ALIGN(obj_size, 256);
451
452 rdev->r600_blit.vs_offset = obj_size;
453 obj_size += evergreen_vs_size * 4;
454 obj_size = ALIGN(obj_size, 256);
455
456 rdev->r600_blit.ps_offset = obj_size;
457 obj_size += evergreen_ps_size * 4;
458 obj_size = ALIGN(obj_size, 256);
459
460 r = radeon_bo_create(rdev, NULL, obj_size, true, RADEON_GEM_DOMAIN_VRAM,
461 &rdev->r600_blit.shader_obj);
462 if (r) {
463 DRM_ERROR("evergreen failed to allocate shader\n");
464 return r;
465 }
466
467 DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",
468 obj_size,
469 rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
470
471 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
472 if (unlikely(r != 0))
473 return r;
474 r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
475 if (r) {
476 DRM_ERROR("failed to map blit object %d\n", r);
477 return r;
478 }
479
480 memcpy_toio(ptr + rdev->r600_blit.state_offset,
481 evergreen_default_state, rdev->r600_blit.state_len * 4);
482
483 if (num_packet2s)
484 memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
485 packet2s, num_packet2s * 4);
486 memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4);
487 memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4);
488 radeon_bo_kunmap(rdev->r600_blit.shader_obj);
489 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
490
491done:
492 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
493 if (unlikely(r != 0))
494 return r;
495 r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
496 &rdev->r600_blit.shader_gpu_addr);
497 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
498 if (r) {
499 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
500 return r;
501 }
502 return 0;
503}
504
505void evergreen_blit_fini(struct radeon_device *rdev)
506{
507 int r;
508
509 if (rdev->r600_blit.shader_obj == NULL)
510 return;
511 /* If we can't reserve the bo, unref should be enough to destroy
512 * it when it becomes idle.
513 */
514 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
515 if (!r) {
516 radeon_bo_unpin(rdev->r600_blit.shader_obj);
517 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
518 }
519 radeon_bo_unref(&rdev->r600_blit.shader_obj);
520}
521
522static int evergreen_vb_ib_get(struct radeon_device *rdev)
523{
524 int r;
525 r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
526 if (r) {
527 DRM_ERROR("failed to get IB for vertex buffer\n");
528 return r;
529 }
530
531 rdev->r600_blit.vb_total = 64*1024;
532 rdev->r600_blit.vb_used = 0;
533 return 0;
534}
535
536static void evergreen_vb_ib_put(struct radeon_device *rdev)
537{
538 radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
539 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
540}
541
542int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
543{
544 int r;
545 int ring_size, line_size;
546 int max_size;
547 /* loops of emits + fence emit possible */
548 int dwords_per_loop = 74, num_loops;
549
550 r = evergreen_vb_ib_get(rdev);
551 if (r)
552 return r;
553
554 /* 8 bpp vs 32 bpp for xfer unit */
555 if (size_bytes & 3)
556 line_size = 8192;
557 else
558 line_size = 8192 * 4;
559
560 max_size = 8192 * line_size;
561
562 /* major loops cover the max size transfer */
563 num_loops = ((size_bytes + max_size) / max_size);
564 /* minor loops cover the extra non aligned bits */
565 num_loops += ((size_bytes % line_size) ? 1 : 0);
566 /* calculate number of loops correctly */
567 ring_size = num_loops * dwords_per_loop;
568 /* set default + shaders */
569 ring_size += 36; /* shaders + def state */
570 ring_size += 10; /* fence emit for VB IB */
571 ring_size += 5; /* done copy */
572 ring_size += 10; /* fence emit for done copy */
573 r = radeon_ring_lock(rdev, ring_size);
574 if (r)
575 return r;
576
577 set_default_state(rdev); /* 20 */
578 set_shaders(rdev); /* 16 */
579 return 0;
580}
581
582void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
583{
584 int r;
585
586 if (rdev->r600_blit.vb_ib)
587 evergreen_vb_ib_put(rdev);
588
589 if (fence)
590 r = radeon_fence_emit(rdev, fence);
591
592 radeon_ring_unlock_commit(rdev);
593}
594
595void evergreen_kms_blit_copy(struct radeon_device *rdev,
596 u64 src_gpu_addr, u64 dst_gpu_addr,
597 int size_bytes)
598{
599 int max_bytes;
600 u64 vb_gpu_addr;
601 u32 *vb;
602
603 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
604 size_bytes, rdev->r600_blit.vb_used);
605 vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
606 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
607 max_bytes = 8192;
608
609 while (size_bytes) {
610 int cur_size = size_bytes;
611 int src_x = src_gpu_addr & 255;
612 int dst_x = dst_gpu_addr & 255;
613 int h = 1;
614 src_gpu_addr = src_gpu_addr & ~255;
615 dst_gpu_addr = dst_gpu_addr & ~255;
616
617 if (!src_x && !dst_x) {
618 h = (cur_size / max_bytes);
619 if (h > 8192)
620 h = 8192;
621 if (h == 0)
622 h = 1;
623 else
624 cur_size = max_bytes;
625 } else {
626 if (cur_size > max_bytes)
627 cur_size = max_bytes;
628 if (cur_size > (max_bytes - dst_x))
629 cur_size = (max_bytes - dst_x);
630 if (cur_size > (max_bytes - src_x))
631 cur_size = (max_bytes - src_x);
632 }
633
634 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
635 WARN_ON(1);
636 }
637
638 vb[0] = i2f(dst_x);
639 vb[1] = 0;
640 vb[2] = i2f(src_x);
641 vb[3] = 0;
642
643 vb[4] = i2f(dst_x);
644 vb[5] = i2f(h);
645 vb[6] = i2f(src_x);
646 vb[7] = i2f(h);
647
648 vb[8] = i2f(dst_x + cur_size);
649 vb[9] = i2f(h);
650 vb[10] = i2f(src_x + cur_size);
651 vb[11] = i2f(h);
652
653 /* src 10 */
654 set_tex_resource(rdev, FMT_8,
655 src_x + cur_size, h, src_x + cur_size,
656 src_gpu_addr);
657
658 /* 5 */
659 cp_set_surface_sync(rdev,
660 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
661
662
663 /* dst 17 */
664 set_render_target(rdev, COLOR_8,
665 dst_x + cur_size, h,
666 dst_gpu_addr);
667
668 /* scissors 12 */
669 set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
670
671 /* 15 */
672 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
673 set_vtx_resource(rdev, vb_gpu_addr);
674
675 /* draw 10 */
676 draw_auto(rdev);
677
678 /* 5 */
679 cp_set_surface_sync(rdev,
680 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
681 cur_size * h, dst_gpu_addr);
682
683 vb += 12;
684 rdev->r600_blit.vb_used += 12 * 4;
685
686 src_gpu_addr += cur_size * h;
687 dst_gpu_addr += cur_size * h;
688 size_bytes -= cur_size * h;
689 }
690 } else {
691 max_bytes = 8192 * 4;
692
693 while (size_bytes) {
694 int cur_size = size_bytes;
695 int src_x = (src_gpu_addr & 255);
696 int dst_x = (dst_gpu_addr & 255);
697 int h = 1;
698 src_gpu_addr = src_gpu_addr & ~255;
699 dst_gpu_addr = dst_gpu_addr & ~255;
700
701 if (!src_x && !dst_x) {
702 h = (cur_size / max_bytes);
703 if (h > 8192)
704 h = 8192;
705 if (h == 0)
706 h = 1;
707 else
708 cur_size = max_bytes;
709 } else {
710 if (cur_size > max_bytes)
711 cur_size = max_bytes;
712 if (cur_size > (max_bytes - dst_x))
713 cur_size = (max_bytes - dst_x);
714 if (cur_size > (max_bytes - src_x))
715 cur_size = (max_bytes - src_x);
716 }
717
718 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
719 WARN_ON(1);
720 }
721
722 vb[0] = i2f(dst_x / 4);
723 vb[1] = 0;
724 vb[2] = i2f(src_x / 4);
725 vb[3] = 0;
726
727 vb[4] = i2f(dst_x / 4);
728 vb[5] = i2f(h);
729 vb[6] = i2f(src_x / 4);
730 vb[7] = i2f(h);
731
732 vb[8] = i2f((dst_x + cur_size) / 4);
733 vb[9] = i2f(h);
734 vb[10] = i2f((src_x + cur_size) / 4);
735 vb[11] = i2f(h);
736
737 /* src 10 */
738 set_tex_resource(rdev, FMT_8_8_8_8,
739 (src_x + cur_size) / 4,
740 h, (src_x + cur_size) / 4,
741 src_gpu_addr);
742 /* 5 */
743 cp_set_surface_sync(rdev,
744 PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
745
746 /* dst 17 */
747 set_render_target(rdev, COLOR_8_8_8_8,
748 (dst_x + cur_size) / 4, h,
749 dst_gpu_addr);
750
751 /* scissors 12 */
752 set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
753
754 /* Vertex buffer setup 15 */
755 vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
756 set_vtx_resource(rdev, vb_gpu_addr);
757
758 /* draw 10 */
759 draw_auto(rdev);
760
761 /* 5 */
762 cp_set_surface_sync(rdev,
763 PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
764 cur_size * h, dst_gpu_addr);
765
766 /* 74 ring dwords per loop */
767 vb += 12;
768 rdev->r600_blit.vb_used += 12 * 4;
769
770 src_gpu_addr += cur_size * h;
771 dst_gpu_addr += cur_size * h;
772 size_bytes -= cur_size * h;
773 }
774 }
775}
776
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
new file mode 100644
index 000000000000..5d5045027b46
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
@@ -0,0 +1,359 @@
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Alex Deucher <alexander.deucher@amd.com>
25 */
26
27#include <linux/types.h>
28#include <linux/kernel.h>
29
30/*
31 * evergreen cards need to use the 3D engine to blit data which requires
32 * quite a bit of hw state setup. Rather than pull the whole 3D driver
33 * (which normally generates the 3D state) into the DRM, we opt to use
34 * statically generated state tables. The regsiter state and shaders
35 * were hand generated to support blitting functionality. See the 3D
36 * driver or documentation for descriptions of the registers and
37 * shader instructions.
38 */
39
40const u32 evergreen_default_state[] =
41{
42 0xc0012800, /* CONTEXT_CONTROL */
43 0x80000000,
44 0x80000000,
45
46 0xc0016900,
47 0x0000023b,
48 0x00000000, /* SQ_LDS_ALLOC_PS */
49
50 0xc0066900,
51 0x00000240,
52 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
53 0x00000000,
54 0x00000000,
55 0x00000000,
56 0x00000000,
57 0x00000000,
58
59 0xc0046900,
60 0x00000247,
61 0x00000000, /* SQ_GS_VERT_ITEMSIZE */
62 0x00000000,
63 0x00000000,
64 0x00000000,
65
66 0xc0026f00,
67 0x00000000,
68 0x00000000, /* SQ_VTX_BASE_VTX_LOC */
69 0x00000000,
70
71 0xc0026900,
72 0x00000010,
73 0x00000000, /* DB_Z_INFO */
74 0x00000000, /* DB_STENCIL_INFO */
75
76
77 0xc0016900,
78 0x00000200,
79 0x00000000, /* DB_DEPTH_CONTROL */
80
81 0xc0066900,
82 0x00000000,
83 0x00000060, /* DB_RENDER_CONTROL */
84 0x00000000, /* DB_COUNT_CONTROL */
85 0x00000000, /* DB_DEPTH_VIEW */
86 0x0000002a, /* DB_RENDER_OVERRIDE */
87 0x00000000, /* DB_RENDER_OVERRIDE2 */
88 0x00000000, /* DB_HTILE_DATA_BASE */
89
90 0xc0026900,
91 0x0000000a,
92 0x00000000, /* DB_STENCIL_CLEAR */
93 0x00000000, /* DB_DEPTH_CLEAR */
94
95 0xc0016900,
96 0x000002dc,
97 0x0000aa00, /* DB_ALPHA_TO_MASK */
98
99 0xc0016900,
100 0x00000080,
101 0x00000000, /* PA_SC_WINDOW_OFFSET */
102
103 0xc00d6900,
104 0x00000083,
105 0x0000ffff, /* PA_SC_CLIPRECT_RULE */
106 0x00000000, /* PA_SC_CLIPRECT_0_TL */
107 0x20002000, /* PA_SC_CLIPRECT_0_BR */
108 0x00000000,
109 0x20002000,
110 0x00000000,
111 0x20002000,
112 0x00000000,
113 0x20002000,
114 0xaaaaaaaa, /* PA_SC_EDGERULE */
115 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
116 0x0000000f, /* CB_TARGET_MASK */
117 0x0000000f, /* CB_SHADER_MASK */
118
119 0xc0226900,
120 0x00000094,
121 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
122 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
123 0x80000000,
124 0x20002000,
125 0x80000000,
126 0x20002000,
127 0x80000000,
128 0x20002000,
129 0x80000000,
130 0x20002000,
131 0x80000000,
132 0x20002000,
133 0x80000000,
134 0x20002000,
135 0x80000000,
136 0x20002000,
137 0x80000000,
138 0x20002000,
139 0x80000000,
140 0x20002000,
141 0x80000000,
142 0x20002000,
143 0x80000000,
144 0x20002000,
145 0x80000000,
146 0x20002000,
147 0x80000000,
148 0x20002000,
149 0x80000000,
150 0x20002000,
151 0x80000000,
152 0x20002000,
153 0x00000000, /* PA_SC_VPORT_ZMIN_0 */
154 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
155
156 0xc0016900,
157 0x000000d4,
158 0x00000000, /* SX_MISC */
159
160 0xc0026900,
161 0x00000292,
162 0x00000000, /* PA_SC_MODE_CNTL_0 */
163 0x00000000, /* PA_SC_MODE_CNTL_1 */
164
165 0xc0106900,
166 0x00000300,
167 0x00000000, /* PA_SC_LINE_CNTL */
168 0x00000000, /* PA_SC_AA_CONFIG */
169 0x00000005, /* PA_SU_VTX_CNTL */
170 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
171 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
172 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
173 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
174 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_0 */
175 0x00000000, /* */
176 0x00000000, /* */
177 0x00000000, /* */
178 0x00000000, /* */
179 0x00000000, /* */
180 0x00000000, /* */
181 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_7 */
182 0xffffffff, /* PA_SC_AA_MASK */
183
184 0xc00d6900,
185 0x00000202,
186 0x00cc0010, /* CB_COLOR_CONTROL */
187 0x00000210, /* DB_SHADER_CONTROL */
188 0x00010000, /* PA_CL_CLIP_CNTL */
189 0x00000004, /* PA_SU_SC_MODE_CNTL */
190 0x00000100, /* PA_CL_VTE_CNTL */
191 0x00000000, /* PA_CL_VS_OUT_CNTL */
192 0x00000000, /* PA_CL_NANINF_CNTL */
193 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
194 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
195 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
196 0x00000000, /* */
197 0x00000000, /* */
198 0x00000000, /* SQ_DYN_GPR_RESOURCE_LIMIT_1 */
199
200 0xc0066900,
201 0x000002de,
202 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
203 0x00000000, /* */
204 0x00000000, /* */
205 0x00000000, /* */
206 0x00000000, /* */
207 0x00000000, /* */
208
209 0xc0016900,
210 0x00000229,
211 0x00000000, /* SQ_PGM_START_FS */
212
213 0xc0016900,
214 0x0000022a,
215 0x00000000, /* SQ_PGM_RESOURCES_FS */
216
217 0xc0096900,
218 0x00000100,
219 0x00ffffff, /* VGT_MAX_VTX_INDX */
220 0x00000000, /* */
221 0x00000000, /* */
222 0x00000000, /* */
223 0x00000000, /* SX_ALPHA_TEST_CONTROL */
224 0x00000000, /* CB_BLEND_RED */
225 0x00000000, /* CB_BLEND_GREEN */
226 0x00000000, /* CB_BLEND_BLUE */
227 0x00000000, /* CB_BLEND_ALPHA */
228
229 0xc0026900,
230 0x000002a8,
231 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
232 0x00000000, /* */
233
234 0xc0026900,
235 0x000002ad,
236 0x00000000, /* VGT_REUSE_OFF */
237 0x00000000, /* */
238
239 0xc0116900,
240 0x00000280,
241 0x00000000, /* PA_SU_POINT_SIZE */
242 0x00000000, /* PA_SU_POINT_MINMAX */
243 0x00000008, /* PA_SU_LINE_CNTL */
244 0x00000000, /* PA_SC_LINE_STIPPLE */
245 0x00000000, /* VGT_OUTPUT_PATH_CNTL */
246 0x00000000, /* VGT_HOS_CNTL */
247 0x00000000, /* */
248 0x00000000, /* */
249 0x00000000, /* */
250 0x00000000, /* */
251 0x00000000, /* */
252 0x00000000, /* */
253 0x00000000, /* */
254 0x00000000, /* */
255 0x00000000, /* */
256 0x00000000, /* */
257 0x00000000, /* VGT_GS_MODE */
258
259 0xc0016900,
260 0x000002a1,
261 0x00000000, /* VGT_PRIMITIVEID_EN */
262
263 0xc0016900,
264 0x000002a5,
265 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
266
267 0xc0016900,
268 0x000002d5,
269 0x00000000, /* VGT_SHADER_STAGES_EN */
270
271 0xc0026900,
272 0x000002e5,
273 0x00000000, /* VGT_STRMOUT_CONFIG */
274 0x00000000, /* */
275
276 0xc0016900,
277 0x000001e0,
278 0x00000000, /* CB_BLEND0_CONTROL */
279
280 0xc0016900,
281 0x000001b1,
282 0x00000000, /* SPI_VS_OUT_CONFIG */
283
284 0xc0016900,
285 0x00000187,
286 0x00000000, /* SPI_VS_OUT_ID_0 */
287
288 0xc0016900,
289 0x00000191,
290 0x00000100, /* SPI_PS_INPUT_CNTL_0 */
291
292 0xc00b6900,
293 0x000001b3,
294 0x20000001, /* SPI_PS_IN_CONTROL_0 */
295 0x00000000, /* SPI_PS_IN_CONTROL_1 */
296 0x00000000, /* SPI_INTERP_CONTROL_0 */
297 0x00000000, /* SPI_INPUT_Z */
298 0x00000000, /* SPI_FOG_CNTL */
299 0x00100000, /* SPI_BARYC_CNTL */
300 0x00000000, /* SPI_PS_IN_CONTROL_2 */
301 0x00000000, /* */
302 0x00000000, /* */
303 0x00000000, /* */
304 0x00000000, /* */
305
306 0xc0036e00, /* SET_SAMPLER */
307 0x00000000,
308 0x00000012,
309 0x00000000,
310 0x00000000,
311};
312
313const u32 evergreen_vs[] =
314{
315 0x00000004,
316 0x80800400,
317 0x0000a03c,
318 0x95000688,
319 0x00004000,
320 0x15200688,
321 0x00000000,
322 0x00000000,
323 0x3c000000,
324 0x67961001,
325 0x00080000,
326 0x00000000,
327 0x1c000000,
328 0x67961000,
329 0x00000008,
330 0x00000000,
331};
332
333const u32 evergreen_ps[] =
334{
335 0x00000003,
336 0xa00c0000,
337 0x00000008,
338 0x80400000,
339 0x00000000,
340 0x95200688,
341 0x00380400,
342 0x00146b10,
343 0x00380000,
344 0x20146b10,
345 0x00380400,
346 0x40146b00,
347 0x80380000,
348 0x60146b00,
349 0x00000000,
350 0x00000000,
351 0x00000010,
352 0x000d1000,
353 0xb0800000,
354 0x00000000,
355};
356
357const u32 evergreen_ps_size = ARRAY_SIZE(evergreen_ps);
358const u32 evergreen_vs_size = ARRAY_SIZE(evergreen_vs);
359const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state);
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.h b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h
new file mode 100644
index 000000000000..bb8d6c751595
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright 2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#ifndef EVERGREEN_BLIT_SHADERS_H
26#define EVERGREEN_BLIT_SHADERS_H
27
28extern const u32 evergreen_ps[];
29extern const u32 evergreen_vs[];
30extern const u32 evergreen_default_state[];
31
32extern const u32 evergreen_ps_size, evergreen_vs_size;
33extern const u32 evergreen_default_size;
34
35#endif
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 9b7532dd30f7..319aa9752d40 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -802,6 +802,11 @@
802#define SQ_ALU_CONST_CACHE_LS_14 0x28f78 802#define SQ_ALU_CONST_CACHE_LS_14 0x28f78
803#define SQ_ALU_CONST_CACHE_LS_15 0x28f7c 803#define SQ_ALU_CONST_CACHE_LS_15 0x28f7c
804 804
805#define PA_SC_SCREEN_SCISSOR_TL 0x28030
806#define PA_SC_GENERIC_SCISSOR_TL 0x28240
807#define PA_SC_WINDOW_SCISSOR_TL 0x28204
808#define VGT_PRIMITIVE_TYPE 0x8958
809
805#define DB_DEPTH_CONTROL 0x28800 810#define DB_DEPTH_CONTROL 0x28800
806#define DB_DEPTH_VIEW 0x28008 811#define DB_DEPTH_VIEW 0x28008
807#define DB_HTILE_DATA_BASE 0x28014 812#define DB_HTILE_DATA_BASE 0x28014
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 90394df63009..2a4747d9747c 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -565,7 +565,7 @@ void r600_blit_fini(struct radeon_device *rdev)
565 radeon_bo_unref(&rdev->r600_blit.shader_obj); 565 radeon_bo_unref(&rdev->r600_blit.shader_obj);
566} 566}
567 567
568int r600_vb_ib_get(struct radeon_device *rdev) 568static int r600_vb_ib_get(struct radeon_device *rdev)
569{ 569{
570 int r; 570 int r;
571 r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); 571 r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
@@ -579,7 +579,7 @@ int r600_vb_ib_get(struct radeon_device *rdev)
579 return 0; 579 return 0;
580} 580}
581 581
582void r600_vb_ib_put(struct radeon_device *rdev) 582static void r600_vb_ib_put(struct radeon_device *rdev)
583{ 583{
584 radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); 584 radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
585 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); 585 radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
@@ -683,17 +683,6 @@ void r600_kms_blit_copy(struct radeon_device *rdev,
683 683
684 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { 684 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
685 WARN_ON(1); 685 WARN_ON(1);
686
687#if 0
688 r600_vb_ib_put(rdev);
689
690 r600_nomm_put_vb(dev);
691 r600_nomm_get_vb(dev);
692 if (!dev_priv->blit_vb)
693 return;
694 set_shaders(dev);
695 vb = r600_nomm_get_vb_ptr(dev);
696#endif
697 } 686 }
698 687
699 vb[0] = i2f(dst_x); 688 vb[0] = i2f(dst_x);
@@ -778,17 +767,6 @@ void r600_kms_blit_copy(struct radeon_device *rdev,
778 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { 767 if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
779 WARN_ON(1); 768 WARN_ON(1);
780 } 769 }
781#if 0
782 if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) {
783 r600_nomm_put_vb(dev);
784 r600_nomm_get_vb(dev);
785 if (!rdev->blit_vb)
786 return;
787
788 set_shaders(dev);
789 vb = r600_nomm_get_vb_ptr(dev);
790 }
791#endif
792 770
793 vb[0] = i2f(dst_x / 4); 771 vb[0] = i2f(dst_x / 4);
794 vb[1] = 0; 772 vb[1] = 0;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8e5cb2c4fa7e..2edd52ece226 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1130,6 +1130,12 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
1130void r600_kms_blit_copy(struct radeon_device *rdev, 1130void r600_kms_blit_copy(struct radeon_device *rdev,
1131 u64 src_gpu_addr, u64 dst_gpu_addr, 1131 u64 src_gpu_addr, u64 dst_gpu_addr,
1132 int size_bytes); 1132 int size_bytes);
1133/* evergreen blit */
1134int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
1135void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
1136void evergreen_kms_blit_copy(struct radeon_device *rdev,
1137 u64 src_gpu_addr, u64 dst_gpu_addr,
1138 int size_bytes);
1133 1139
1134static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) 1140static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
1135{ 1141{
@@ -1471,6 +1477,8 @@ extern void r700_cp_stop(struct radeon_device *rdev);
1471extern void r700_cp_fini(struct radeon_device *rdev); 1477extern void r700_cp_fini(struct radeon_device *rdev);
1472extern void evergreen_disable_interrupt_state(struct radeon_device *rdev); 1478extern void evergreen_disable_interrupt_state(struct radeon_device *rdev);
1473extern int evergreen_irq_set(struct radeon_device *rdev); 1479extern int evergreen_irq_set(struct radeon_device *rdev);
1480extern int evergreen_blit_init(struct radeon_device *rdev);
1481extern void evergreen_blit_fini(struct radeon_device *rdev);
1474 1482
1475/* radeon_acpi.c */ 1483/* radeon_acpi.c */
1476#if defined(CONFIG_ACPI) 1484#if defined(CONFIG_ACPI)
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 25e1dd197791..64fb89ecbf74 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -726,9 +726,9 @@ static struct radeon_asic evergreen_asic = {
726 .get_vblank_counter = &evergreen_get_vblank_counter, 726 .get_vblank_counter = &evergreen_get_vblank_counter,
727 .fence_ring_emit = &r600_fence_ring_emit, 727 .fence_ring_emit = &r600_fence_ring_emit,
728 .cs_parse = &evergreen_cs_parse, 728 .cs_parse = &evergreen_cs_parse,
729 .copy_blit = NULL, 729 .copy_blit = &evergreen_copy_blit,
730 .copy_dma = NULL, 730 .copy_dma = &evergreen_copy_blit,
731 .copy = NULL, 731 .copy = &evergreen_copy_blit,
732 .get_engine_clock = &radeon_atom_get_engine_clock, 732 .get_engine_clock = &radeon_atom_get_engine_clock,
733 .set_engine_clock = &radeon_atom_set_engine_clock, 733 .set_engine_clock = &radeon_atom_set_engine_clock,
734 .get_memory_clock = &radeon_atom_get_memory_clock, 734 .get_memory_clock = &radeon_atom_get_memory_clock,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 6d3b055c02fd..740988244143 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -254,11 +254,6 @@ void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
254int r600_cs_parse(struct radeon_cs_parser *p); 254int r600_cs_parse(struct radeon_cs_parser *p);
255void r600_fence_ring_emit(struct radeon_device *rdev, 255void r600_fence_ring_emit(struct radeon_device *rdev,
256 struct radeon_fence *fence); 256 struct radeon_fence *fence);
257int r600_copy_dma(struct radeon_device *rdev,
258 uint64_t src_offset,
259 uint64_t dst_offset,
260 unsigned num_pages,
261 struct radeon_fence *fence);
262int r600_irq_process(struct radeon_device *rdev); 257int r600_irq_process(struct radeon_device *rdev);
263int r600_irq_set(struct radeon_device *rdev); 258int r600_irq_set(struct radeon_device *rdev);
264bool r600_gpu_is_lockup(struct radeon_device *rdev); 259bool r600_gpu_is_lockup(struct radeon_device *rdev);
@@ -304,6 +299,9 @@ int evergreen_resume(struct radeon_device *rdev);
304bool evergreen_gpu_is_lockup(struct radeon_device *rdev); 299bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
305int evergreen_asic_reset(struct radeon_device *rdev); 300int evergreen_asic_reset(struct radeon_device *rdev);
306void evergreen_bandwidth_update(struct radeon_device *rdev); 301void evergreen_bandwidth_update(struct radeon_device *rdev);
302int evergreen_copy_blit(struct radeon_device *rdev,
303 uint64_t src_offset, uint64_t dst_offset,
304 unsigned num_pages, struct radeon_fence *fence);
307void evergreen_hpd_init(struct radeon_device *rdev); 305void evergreen_hpd_init(struct radeon_device *rdev);
308void evergreen_hpd_fini(struct radeon_device *rdev); 306void evergreen_hpd_fini(struct radeon_device *rdev);
309bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); 307bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);