aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/radeon/cayman_blit_shaders.c326
-rw-r--r--drivers/gpu/drm/radeon/cayman_blit_shaders.h3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c505
-rw-r--r--drivers/gpu/drm/radeon/ni.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c6
5 files changed, 598 insertions, 255 deletions
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
index e148ab04b80b..7b4eeb7b4a8c 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
@@ -39,17 +39,335 @@
39 39
40const u32 cayman_default_state[] = 40const u32 cayman_default_state[] =
41{ 41{
42 /* XXX fill in additional blit state */ 42 0xc0066900,
43 0x00000000,
44 0x00000060, /* DB_RENDER_CONTROL */
45 0x00000000, /* DB_COUNT_CONTROL */
46 0x00000000, /* DB_DEPTH_VIEW */
47 0x0000002a, /* DB_RENDER_OVERRIDE */
48 0x00000000, /* DB_RENDER_OVERRIDE2 */
49 0x00000000, /* DB_HTILE_DATA_BASE */
43 50
44 0xc0026900, 51 0xc0026900,
45 0x00000316, 52 0x0000000a,
46 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 53 0x00000000, /* DB_STENCIL_CLEAR */
47 0x00000010, /* */ 54 0x00000000, /* DB_DEPTH_CLEAR */
55
56 0xc0036900,
57 0x0000000f,
58 0x00000000, /* DB_DEPTH_INFO */
59 0x00000000, /* DB_Z_INFO */
60 0x00000000, /* DB_STENCIL_INFO */
61
62 0xc0016900,
63 0x00000080,
64 0x00000000, /* PA_SC_WINDOW_OFFSET */
65
66 0xc00d6900,
67 0x00000083,
68 0x0000ffff, /* PA_SC_CLIPRECT_RULE */
69 0x00000000, /* PA_SC_CLIPRECT_0_TL */
70 0x20002000, /* PA_SC_CLIPRECT_0_BR */
71 0x00000000,
72 0x20002000,
73 0x00000000,
74 0x20002000,
75 0x00000000,
76 0x20002000,
77 0xaaaaaaaa, /* PA_SC_EDGERULE */
78 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
79 0x0000000f, /* CB_TARGET_MASK */
80 0x0000000f, /* CB_SHADER_MASK */
81
82 0xc0226900,
83 0x00000094,
84 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
85 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
86 0x80000000,
87 0x20002000,
88 0x80000000,
89 0x20002000,
90 0x80000000,
91 0x20002000,
92 0x80000000,
93 0x20002000,
94 0x80000000,
95 0x20002000,
96 0x80000000,
97 0x20002000,
98 0x80000000,
99 0x20002000,
100 0x80000000,
101 0x20002000,
102 0x80000000,
103 0x20002000,
104 0x80000000,
105 0x20002000,
106 0x80000000,
107 0x20002000,
108 0x80000000,
109 0x20002000,
110 0x80000000,
111 0x20002000,
112 0x80000000,
113 0x20002000,
114 0x80000000,
115 0x20002000,
116 0x00000000, /* PA_SC_VPORT_ZMIN_0 */
117 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
118
119 0xc0016900,
120 0x000000d4,
121 0x00000000, /* SX_MISC */
48 122
49 0xc0026900, 123 0xc0026900,
50 0x000000d9, 124 0x000000d9,
51 0x00000000, /* CP_RINGID */ 125 0x00000000, /* CP_RINGID */
52 0x00000000, /* CP_VMID */ 126 0x00000000, /* CP_VMID */
127
128 0xc0096900,
129 0x00000100,
130 0x00ffffff, /* VGT_MAX_VTX_INDX */
131 0x00000000, /* VGT_MIN_VTX_INDX */
132 0x00000000, /* VGT_INDX_OFFSET */
133 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
134 0x00000000, /* SX_ALPHA_TEST_CONTROL */
135 0x00000000, /* CB_BLEND_RED */
136 0x00000000, /* CB_BLEND_GREEN */
137 0x00000000, /* CB_BLEND_BLUE */
138 0x00000000, /* CB_BLEND_ALPHA */
139
140 0xc0016900,
141 0x00000187,
142 0x00000100, /* SPI_VS_OUT_ID_0 */
143
144 0xc0026900,
145 0x00000191,
146 0x00000100, /* SPI_PS_INPUT_CNTL_0 */
147 0x00000101, /* SPI_PS_INPUT_CNTL_1 */
148
149 0xc0016900,
150 0x000001b1,
151 0x00000000, /* SPI_VS_OUT_CONFIG */
152
153 0xc0106900,
154 0x000001b3,
155 0x20000001, /* SPI_PS_IN_CONTROL_0 */
156 0x00000000, /* SPI_PS_IN_CONTROL_1 */
157 0x00000000, /* SPI_INTERP_CONTROL_0 */
158 0x00000000, /* SPI_INPUT_Z */
159 0x00000000, /* SPI_FOG_CNTL */
160 0x00100000, /* SPI_BARYC_CNTL */
161 0x00000000, /* SPI_PS_IN_CONTROL_2 */
162 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
163 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
164 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
165 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
166 0x00000000, /* SPI_GPR_MGMT */
167 0x00000000, /* SPI_LDS_MGMT */
168 0x00000000, /* SPI_STACK_MGMT */
169 0x00000000, /* SPI_WAVE_MGMT_1 */
170 0x00000000, /* SPI_WAVE_MGMT_2 */
171
172 0xc0016900,
173 0x000001e0,
174 0x00000000, /* CB_BLEND0_CONTROL */
175
176 0xc00e6900,
177 0x00000200,
178 0x00000000, /* DB_DEPTH_CONTROL */
179 0x00000000, /* DB_EQAA */
180 0x00cc0010, /* CB_COLOR_CONTROL */
181 0x00000210, /* DB_SHADER_CONTROL */
182 0x00010000, /* PA_CL_CLIP_CNTL */
183 0x00000004, /* PA_SU_SC_MODE_CNTL */
184 0x00000100, /* PA_CL_VTE_CNTL */
185 0x00000000, /* PA_CL_VS_OUT_CNTL */
186 0x00000000, /* PA_CL_NANINF_CNTL */
187 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
188 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
189 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
190 0x00000000, /* */
191 0x00000000, /* */
192
193 0xc0026900,
194 0x00000229,
195 0x00000000, /* SQ_PGM_START_FS */
196 0x00000000,
197
198 0xc0016900,
199 0x0000023b,
200 0x00000000, /* SQ_LDS_ALLOC_PS */
201
202 0xc0066900,
203 0x00000240,
204 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
205 0x00000000,
206 0x00000000,
207 0x00000000,
208 0x00000000,
209 0x00000000,
210
211 0xc0046900,
212 0x00000247,
213 0x00000000, /* SQ_GS_VERT_ITEMSIZE */
214 0x00000000,
215 0x00000000,
216 0x00000000,
217
218 0xc0116900,
219 0x00000280,
220 0x00000000, /* PA_SU_POINT_SIZE */
221 0x00000000, /* PA_SU_POINT_MINMAX */
222 0x00000008, /* PA_SU_LINE_CNTL */
223 0x00000000, /* PA_SC_LINE_STIPPLE */
224 0x00000000, /* VGT_OUTPUT_PATH_CNTL */
225 0x00000000, /* VGT_HOS_CNTL */
226 0x00000000,
227 0x00000000,
228 0x00000000,
229 0x00000000,
230 0x00000000,
231 0x00000000,
232 0x00000000,
233 0x00000000,
234 0x00000000,
235 0x00000000,
236 0x00000000, /* VGT_GS_MODE */
237
238 0xc0026900,
239 0x00000292,
240 0x00000000, /* PA_SC_MODE_CNTL_0 */
241 0x00000000, /* PA_SC_MODE_CNTL_1 */
242
243 0xc0016900,
244 0x000002a1,
245 0x00000000, /* VGT_PRIMITIVEID_EN */
246
247 0xc0016900,
248 0x000002a5,
249 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
250
251 0xc0026900,
252 0x000002a8,
253 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
254 0x00000000,
255
256 0xc0026900,
257 0x000002ad,
258 0x00000000, /* VGT_REUSE_OFF */
259 0x00000000,
260
261 0xc0016900,
262 0x000002d5,
263 0x00000000, /* VGT_SHADER_STAGES_EN */
264
265 0xc0016900,
266 0x000002dc,
267 0x0000aa00, /* DB_ALPHA_TO_MASK */
268
269 0xc0066900,
270 0x000002de,
271 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
272 0x00000000,
273 0x00000000,
274 0x00000000,
275 0x00000000,
276 0x00000000,
277
278 0xc0026900,
279 0x000002e5,
280 0x00000000, /* VGT_STRMOUT_CONFIG */
281 0x00000000,
282
283 0xc01b6900,
284 0x000002f5,
285 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
286 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
287 0x00000000, /* PA_SC_LINE_CNTL */
288 0x00000000, /* PA_SC_AA_CONFIG */
289 0x00000005, /* PA_SU_VTX_CNTL */
290 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
291 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
292 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
293 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
294 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
295 0x00000000,
296 0x00000000,
297 0x00000000,
298 0x00000000,
299 0x00000000,
300 0x00000000,
301 0x00000000,
302 0x00000000,
303 0x00000000,
304 0x00000000,
305 0x00000000,
306 0x00000000,
307 0x00000000,
308 0x00000000,
309 0x00000000,
310 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
311 0xffffffff,
312
313 0xc0026900,
314 0x00000316,
315 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
316 0x00000010, /* */
317};
318
319const u32 cayman_vs[] =
320{
321 0x00000004,
322 0x80400400,
323 0x0000a03c,
324 0x95000688,
325 0x00004000,
326 0x15000688,
327 0x00000000,
328 0x88000000,
329 0x04000000,
330 0x67961001,
331#ifdef __BIG_ENDIAN
332 0x00020000,
333#else
334 0x00000000,
335#endif
336 0x00000000,
337 0x04000000,
338 0x67961000,
339#ifdef __BIG_ENDIAN
340 0x00020008,
341#else
342 0x00000008,
343#endif
344 0x00000000,
345};
346
347const u32 cayman_ps[] =
348{
349 0x00000004,
350 0xa00c0000,
351 0x00000008,
352 0x80400000,
353 0x00000000,
354 0x95000688,
355 0x00000000,
356 0x88000000,
357 0x00380400,
358 0x00146b10,
359 0x00380000,
360 0x20146b10,
361 0x00380400,
362 0x40146b00,
363 0x80380000,
364 0x60146b00,
365 0x00000010,
366 0x000d1000,
367 0xb0800000,
368 0x00000000,
53}; 369};
54 370
371const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
372const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
55const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); 373const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
index 33b75e5d0fa4..f5d0e9a60267 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
@@ -25,8 +25,11 @@
25#ifndef CAYMAN_BLIT_SHADERS_H 25#ifndef CAYMAN_BLIT_SHADERS_H
26#define CAYMAN_BLIT_SHADERS_H 26#define CAYMAN_BLIT_SHADERS_H
27 27
28extern const u32 cayman_ps[];
29extern const u32 cayman_vs[];
28extern const u32 cayman_default_state[]; 30extern const u32 cayman_default_state[];
29 31
32extern const u32 cayman_ps_size, cayman_vs_size;
30extern const u32 cayman_default_size; 33extern const u32 cayman_default_size;
31 34
32#endif 35#endif
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 40867290863c..a60ad28b0389 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -31,6 +31,7 @@
31 31
32#include "evergreend.h" 32#include "evergreend.h"
33#include "evergreen_blit_shaders.h" 33#include "evergreen_blit_shaders.h"
34#include "cayman_blit_shaders.h"
34 35
35#define DI_PT_RECTLIST 0x11 36#define DI_PT_RECTLIST 0x11
36#define DI_INDEX_SIZE_16_BIT 0x0 37#define DI_INDEX_SIZE_16_BIT 0x0
@@ -265,238 +266,240 @@ set_default_state(struct radeon_device *rdev)
265 u64 gpu_addr; 266 u64 gpu_addr;
266 int dwords; 267 int dwords;
267 268
268 switch (rdev->family) {
269 case CHIP_CEDAR:
270 default:
271 num_ps_gprs = 93;
272 num_vs_gprs = 46;
273 num_temp_gprs = 4;
274 num_gs_gprs = 31;
275 num_es_gprs = 31;
276 num_hs_gprs = 23;
277 num_ls_gprs = 23;
278 num_ps_threads = 96;
279 num_vs_threads = 16;
280 num_gs_threads = 16;
281 num_es_threads = 16;
282 num_hs_threads = 16;
283 num_ls_threads = 16;
284 num_ps_stack_entries = 42;
285 num_vs_stack_entries = 42;
286 num_gs_stack_entries = 42;
287 num_es_stack_entries = 42;
288 num_hs_stack_entries = 42;
289 num_ls_stack_entries = 42;
290 break;
291 case CHIP_REDWOOD:
292 num_ps_gprs = 93;
293 num_vs_gprs = 46;
294 num_temp_gprs = 4;
295 num_gs_gprs = 31;
296 num_es_gprs = 31;
297 num_hs_gprs = 23;
298 num_ls_gprs = 23;
299 num_ps_threads = 128;
300 num_vs_threads = 20;
301 num_gs_threads = 20;
302 num_es_threads = 20;
303 num_hs_threads = 20;
304 num_ls_threads = 20;
305 num_ps_stack_entries = 42;
306 num_vs_stack_entries = 42;
307 num_gs_stack_entries = 42;
308 num_es_stack_entries = 42;
309 num_hs_stack_entries = 42;
310 num_ls_stack_entries = 42;
311 break;
312 case CHIP_JUNIPER:
313 num_ps_gprs = 93;
314 num_vs_gprs = 46;
315 num_temp_gprs = 4;
316 num_gs_gprs = 31;
317 num_es_gprs = 31;
318 num_hs_gprs = 23;
319 num_ls_gprs = 23;
320 num_ps_threads = 128;
321 num_vs_threads = 20;
322 num_gs_threads = 20;
323 num_es_threads = 20;
324 num_hs_threads = 20;
325 num_ls_threads = 20;
326 num_ps_stack_entries = 85;
327 num_vs_stack_entries = 85;
328 num_gs_stack_entries = 85;
329 num_es_stack_entries = 85;
330 num_hs_stack_entries = 85;
331 num_ls_stack_entries = 85;
332 break;
333 case CHIP_CYPRESS:
334 case CHIP_HEMLOCK:
335 num_ps_gprs = 93;
336 num_vs_gprs = 46;
337 num_temp_gprs = 4;
338 num_gs_gprs = 31;
339 num_es_gprs = 31;
340 num_hs_gprs = 23;
341 num_ls_gprs = 23;
342 num_ps_threads = 128;
343 num_vs_threads = 20;
344 num_gs_threads = 20;
345 num_es_threads = 20;
346 num_hs_threads = 20;
347 num_ls_threads = 20;
348 num_ps_stack_entries = 85;
349 num_vs_stack_entries = 85;
350 num_gs_stack_entries = 85;
351 num_es_stack_entries = 85;
352 num_hs_stack_entries = 85;
353 num_ls_stack_entries = 85;
354 break;
355 case CHIP_PALM:
356 num_ps_gprs = 93;
357 num_vs_gprs = 46;
358 num_temp_gprs = 4;
359 num_gs_gprs = 31;
360 num_es_gprs = 31;
361 num_hs_gprs = 23;
362 num_ls_gprs = 23;
363 num_ps_threads = 96;
364 num_vs_threads = 16;
365 num_gs_threads = 16;
366 num_es_threads = 16;
367 num_hs_threads = 16;
368 num_ls_threads = 16;
369 num_ps_stack_entries = 42;
370 num_vs_stack_entries = 42;
371 num_gs_stack_entries = 42;
372 num_es_stack_entries = 42;
373 num_hs_stack_entries = 42;
374 num_ls_stack_entries = 42;
375 break;
376 case CHIP_BARTS:
377 num_ps_gprs = 93;
378 num_vs_gprs = 46;
379 num_temp_gprs = 4;
380 num_gs_gprs = 31;
381 num_es_gprs = 31;
382 num_hs_gprs = 23;
383 num_ls_gprs = 23;
384 num_ps_threads = 128;
385 num_vs_threads = 20;
386 num_gs_threads = 20;
387 num_es_threads = 20;
388 num_hs_threads = 20;
389 num_ls_threads = 20;
390 num_ps_stack_entries = 85;
391 num_vs_stack_entries = 85;
392 num_gs_stack_entries = 85;
393 num_es_stack_entries = 85;
394 num_hs_stack_entries = 85;
395 num_ls_stack_entries = 85;
396 break;
397 case CHIP_TURKS:
398 num_ps_gprs = 93;
399 num_vs_gprs = 46;
400 num_temp_gprs = 4;
401 num_gs_gprs = 31;
402 num_es_gprs = 31;
403 num_hs_gprs = 23;
404 num_ls_gprs = 23;
405 num_ps_threads = 128;
406 num_vs_threads = 20;
407 num_gs_threads = 20;
408 num_es_threads = 20;
409 num_hs_threads = 20;
410 num_ls_threads = 20;
411 num_ps_stack_entries = 42;
412 num_vs_stack_entries = 42;
413 num_gs_stack_entries = 42;
414 num_es_stack_entries = 42;
415 num_hs_stack_entries = 42;
416 num_ls_stack_entries = 42;
417 break;
418 case CHIP_CAICOS:
419 num_ps_gprs = 93;
420 num_vs_gprs = 46;
421 num_temp_gprs = 4;
422 num_gs_gprs = 31;
423 num_es_gprs = 31;
424 num_hs_gprs = 23;
425 num_ls_gprs = 23;
426 num_ps_threads = 128;
427 num_vs_threads = 10;
428 num_gs_threads = 10;
429 num_es_threads = 10;
430 num_hs_threads = 10;
431 num_ls_threads = 10;
432 num_ps_stack_entries = 42;
433 num_vs_stack_entries = 42;
434 num_gs_stack_entries = 42;
435 num_es_stack_entries = 42;
436 num_hs_stack_entries = 42;
437 num_ls_stack_entries = 42;
438 break;
439 }
440
441 if ((rdev->family == CHIP_CEDAR) ||
442 (rdev->family == CHIP_PALM) ||
443 (rdev->family == CHIP_CAICOS))
444 sq_config = 0;
445 else
446 sq_config = VC_ENABLE;
447
448 sq_config |= (EXPORT_SRC_C |
449 CS_PRIO(0) |
450 LS_PRIO(0) |
451 HS_PRIO(0) |
452 PS_PRIO(0) |
453 VS_PRIO(1) |
454 GS_PRIO(2) |
455 ES_PRIO(3));
456
457 sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
458 NUM_VS_GPRS(num_vs_gprs) |
459 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
460 sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
461 NUM_ES_GPRS(num_es_gprs));
462 sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
463 NUM_LS_GPRS(num_ls_gprs));
464 sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
465 NUM_VS_THREADS(num_vs_threads) |
466 NUM_GS_THREADS(num_gs_threads) |
467 NUM_ES_THREADS(num_es_threads));
468 sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
469 NUM_LS_THREADS(num_ls_threads));
470 sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
471 NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
472 sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
473 NUM_ES_STACK_ENTRIES(num_es_stack_entries));
474 sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
475 NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
476
477 /* set clear context state */ 269 /* set clear context state */
478 radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); 270 radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
479 radeon_ring_write(rdev, 0); 271 radeon_ring_write(rdev, 0);
480 272
481 /* disable dyn gprs */ 273 if (rdev->family < CHIP_CAYMAN) {
482 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 274 switch (rdev->family) {
483 radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); 275 case CHIP_CEDAR:
484 radeon_ring_write(rdev, 0); 276 default:
277 num_ps_gprs = 93;
278 num_vs_gprs = 46;
279 num_temp_gprs = 4;
280 num_gs_gprs = 31;
281 num_es_gprs = 31;
282 num_hs_gprs = 23;
283 num_ls_gprs = 23;
284 num_ps_threads = 96;
285 num_vs_threads = 16;
286 num_gs_threads = 16;
287 num_es_threads = 16;
288 num_hs_threads = 16;
289 num_ls_threads = 16;
290 num_ps_stack_entries = 42;
291 num_vs_stack_entries = 42;
292 num_gs_stack_entries = 42;
293 num_es_stack_entries = 42;
294 num_hs_stack_entries = 42;
295 num_ls_stack_entries = 42;
296 break;
297 case CHIP_REDWOOD:
298 num_ps_gprs = 93;
299 num_vs_gprs = 46;
300 num_temp_gprs = 4;
301 num_gs_gprs = 31;
302 num_es_gprs = 31;
303 num_hs_gprs = 23;
304 num_ls_gprs = 23;
305 num_ps_threads = 128;
306 num_vs_threads = 20;
307 num_gs_threads = 20;
308 num_es_threads = 20;
309 num_hs_threads = 20;
310 num_ls_threads = 20;
311 num_ps_stack_entries = 42;
312 num_vs_stack_entries = 42;
313 num_gs_stack_entries = 42;
314 num_es_stack_entries = 42;
315 num_hs_stack_entries = 42;
316 num_ls_stack_entries = 42;
317 break;
318 case CHIP_JUNIPER:
319 num_ps_gprs = 93;
320 num_vs_gprs = 46;
321 num_temp_gprs = 4;
322 num_gs_gprs = 31;
323 num_es_gprs = 31;
324 num_hs_gprs = 23;
325 num_ls_gprs = 23;
326 num_ps_threads = 128;
327 num_vs_threads = 20;
328 num_gs_threads = 20;
329 num_es_threads = 20;
330 num_hs_threads = 20;
331 num_ls_threads = 20;
332 num_ps_stack_entries = 85;
333 num_vs_stack_entries = 85;
334 num_gs_stack_entries = 85;
335 num_es_stack_entries = 85;
336 num_hs_stack_entries = 85;
337 num_ls_stack_entries = 85;
338 break;
339 case CHIP_CYPRESS:
340 case CHIP_HEMLOCK:
341 num_ps_gprs = 93;
342 num_vs_gprs = 46;
343 num_temp_gprs = 4;
344 num_gs_gprs = 31;
345 num_es_gprs = 31;
346 num_hs_gprs = 23;
347 num_ls_gprs = 23;
348 num_ps_threads = 128;
349 num_vs_threads = 20;
350 num_gs_threads = 20;
351 num_es_threads = 20;
352 num_hs_threads = 20;
353 num_ls_threads = 20;
354 num_ps_stack_entries = 85;
355 num_vs_stack_entries = 85;
356 num_gs_stack_entries = 85;
357 num_es_stack_entries = 85;
358 num_hs_stack_entries = 85;
359 num_ls_stack_entries = 85;
360 break;
361 case CHIP_PALM:
362 num_ps_gprs = 93;
363 num_vs_gprs = 46;
364 num_temp_gprs = 4;
365 num_gs_gprs = 31;
366 num_es_gprs = 31;
367 num_hs_gprs = 23;
368 num_ls_gprs = 23;
369 num_ps_threads = 96;
370 num_vs_threads = 16;
371 num_gs_threads = 16;
372 num_es_threads = 16;
373 num_hs_threads = 16;
374 num_ls_threads = 16;
375 num_ps_stack_entries = 42;
376 num_vs_stack_entries = 42;
377 num_gs_stack_entries = 42;
378 num_es_stack_entries = 42;
379 num_hs_stack_entries = 42;
380 num_ls_stack_entries = 42;
381 break;
382 case CHIP_BARTS:
383 num_ps_gprs = 93;
384 num_vs_gprs = 46;
385 num_temp_gprs = 4;
386 num_gs_gprs = 31;
387 num_es_gprs = 31;
388 num_hs_gprs = 23;
389 num_ls_gprs = 23;
390 num_ps_threads = 128;
391 num_vs_threads = 20;
392 num_gs_threads = 20;
393 num_es_threads = 20;
394 num_hs_threads = 20;
395 num_ls_threads = 20;
396 num_ps_stack_entries = 85;
397 num_vs_stack_entries = 85;
398 num_gs_stack_entries = 85;
399 num_es_stack_entries = 85;
400 num_hs_stack_entries = 85;
401 num_ls_stack_entries = 85;
402 break;
403 case CHIP_TURKS:
404 num_ps_gprs = 93;
405 num_vs_gprs = 46;
406 num_temp_gprs = 4;
407 num_gs_gprs = 31;
408 num_es_gprs = 31;
409 num_hs_gprs = 23;
410 num_ls_gprs = 23;
411 num_ps_threads = 128;
412 num_vs_threads = 20;
413 num_gs_threads = 20;
414 num_es_threads = 20;
415 num_hs_threads = 20;
416 num_ls_threads = 20;
417 num_ps_stack_entries = 42;
418 num_vs_stack_entries = 42;
419 num_gs_stack_entries = 42;
420 num_es_stack_entries = 42;
421 num_hs_stack_entries = 42;
422 num_ls_stack_entries = 42;
423 break;
424 case CHIP_CAICOS:
425 num_ps_gprs = 93;
426 num_vs_gprs = 46;
427 num_temp_gprs = 4;
428 num_gs_gprs = 31;
429 num_es_gprs = 31;
430 num_hs_gprs = 23;
431 num_ls_gprs = 23;
432 num_ps_threads = 128;
433 num_vs_threads = 10;
434 num_gs_threads = 10;
435 num_es_threads = 10;
436 num_hs_threads = 10;
437 num_ls_threads = 10;
438 num_ps_stack_entries = 42;
439 num_vs_stack_entries = 42;
440 num_gs_stack_entries = 42;
441 num_es_stack_entries = 42;
442 num_hs_stack_entries = 42;
443 num_ls_stack_entries = 42;
444 break;
445 }
485 446
486 /* SQ config */ 447 if ((rdev->family == CHIP_CEDAR) ||
487 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); 448 (rdev->family == CHIP_PALM) ||
488 radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); 449 (rdev->family == CHIP_CAICOS))
489 radeon_ring_write(rdev, sq_config); 450 sq_config = 0;
490 radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); 451 else
491 radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); 452 sq_config = VC_ENABLE;
492 radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); 453
493 radeon_ring_write(rdev, 0); 454 sq_config |= (EXPORT_SRC_C |
494 radeon_ring_write(rdev, 0); 455 CS_PRIO(0) |
495 radeon_ring_write(rdev, sq_thread_resource_mgmt); 456 LS_PRIO(0) |
496 radeon_ring_write(rdev, sq_thread_resource_mgmt_2); 457 HS_PRIO(0) |
497 radeon_ring_write(rdev, sq_stack_resource_mgmt_1); 458 PS_PRIO(0) |
498 radeon_ring_write(rdev, sq_stack_resource_mgmt_2); 459 VS_PRIO(1) |
499 radeon_ring_write(rdev, sq_stack_resource_mgmt_3); 460 GS_PRIO(2) |
461 ES_PRIO(3));
462
463 sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
464 NUM_VS_GPRS(num_vs_gprs) |
465 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
466 sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
467 NUM_ES_GPRS(num_es_gprs));
468 sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
469 NUM_LS_GPRS(num_ls_gprs));
470 sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
471 NUM_VS_THREADS(num_vs_threads) |
472 NUM_GS_THREADS(num_gs_threads) |
473 NUM_ES_THREADS(num_es_threads));
474 sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
475 NUM_LS_THREADS(num_ls_threads));
476 sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
477 NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
478 sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
479 NUM_ES_STACK_ENTRIES(num_es_stack_entries));
480 sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
481 NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
482
483 /* disable dyn gprs */
484 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
485 radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
486 radeon_ring_write(rdev, 0);
487
488 /* SQ config */
489 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
490 radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
491 radeon_ring_write(rdev, sq_config);
492 radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
493 radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
494 radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
495 radeon_ring_write(rdev, 0);
496 radeon_ring_write(rdev, 0);
497 radeon_ring_write(rdev, sq_thread_resource_mgmt);
498 radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
499 radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
500 radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
501 radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
502 }
500 503
501 /* CONTEXT_CONTROL */ 504 /* CONTEXT_CONTROL */
502 radeon_ring_write(rdev, 0xc0012800); 505 radeon_ring_write(rdev, 0xc0012800);
@@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
570 mutex_init(&rdev->r600_blit.mutex); 573 mutex_init(&rdev->r600_blit.mutex);
571 rdev->r600_blit.state_offset = 0; 574 rdev->r600_blit.state_offset = 0;
572 575
573 rdev->r600_blit.state_len = evergreen_default_size; 576 if (rdev->family < CHIP_CAYMAN)
577 rdev->r600_blit.state_len = evergreen_default_size;
578 else
579 rdev->r600_blit.state_len = cayman_default_size;
574 580
575 dwords = rdev->r600_blit.state_len; 581 dwords = rdev->r600_blit.state_len;
576 while (dwords & 0xf) { 582 while (dwords & 0xf) {
@@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
582 obj_size = ALIGN(obj_size, 256); 588 obj_size = ALIGN(obj_size, 256);
583 589
584 rdev->r600_blit.vs_offset = obj_size; 590 rdev->r600_blit.vs_offset = obj_size;
585 obj_size += evergreen_vs_size * 4; 591 if (rdev->family < CHIP_CAYMAN)
592 obj_size += evergreen_vs_size * 4;
593 else
594 obj_size += cayman_vs_size * 4;
586 obj_size = ALIGN(obj_size, 256); 595 obj_size = ALIGN(obj_size, 256);
587 596
588 rdev->r600_blit.ps_offset = obj_size; 597 rdev->r600_blit.ps_offset = obj_size;
589 obj_size += evergreen_ps_size * 4; 598 if (rdev->family < CHIP_CAYMAN)
599 obj_size += evergreen_ps_size * 4;
600 else
601 obj_size += cayman_ps_size * 4;
590 obj_size = ALIGN(obj_size, 256); 602 obj_size = ALIGN(obj_size, 256);
591 603
592 r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 604 r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
@@ -609,16 +621,29 @@ int evergreen_blit_init(struct radeon_device *rdev)
609 return r; 621 return r;
610 } 622 }
611 623
612 memcpy_toio(ptr + rdev->r600_blit.state_offset, 624 if (rdev->family < CHIP_CAYMAN) {
613 evergreen_default_state, rdev->r600_blit.state_len * 4); 625 memcpy_toio(ptr + rdev->r600_blit.state_offset,
614 626 evergreen_default_state, rdev->r600_blit.state_len * 4);
615 if (num_packet2s) 627
616 memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 628 if (num_packet2s)
617 packet2s, num_packet2s * 4); 629 memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
618 for (i = 0; i < evergreen_vs_size; i++) 630 packet2s, num_packet2s * 4);
619 *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); 631 for (i = 0; i < evergreen_vs_size; i++)
620 for (i = 0; i < evergreen_ps_size; i++) 632 *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
621 *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); 633 for (i = 0; i < evergreen_ps_size; i++)
634 *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
635 } else {
636 memcpy_toio(ptr + rdev->r600_blit.state_offset,
637 cayman_default_state, rdev->r600_blit.state_len * 4);
638
639 if (num_packet2s)
640 memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
641 packet2s, num_packet2s * 4);
642 for (i = 0; i < cayman_vs_size; i++)
643 *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
644 for (i = 0; i < cayman_ps_size; i++)
645 *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
646 }
622 radeon_bo_kunmap(rdev->r600_blit.shader_obj); 647 radeon_bo_kunmap(rdev->r600_blit.shader_obj);
623 radeon_bo_unreserve(rdev->r600_blit.shader_obj); 648 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
624 649
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index a0cc7a5ff031..c023b0ad89f4 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
1387 return r; 1387 return r;
1388 cayman_gpu_init(rdev); 1388 cayman_gpu_init(rdev);
1389 1389
1390#if 0 1390 r = evergreen_blit_init(rdev);
1391 r = cayman_blit_init(rdev);
1392 if (r) { 1391 if (r) {
1393 cayman_blit_fini(rdev); 1392 evergreen_blit_fini(rdev);
1394 rdev->asic->copy = NULL; 1393 rdev->asic->copy = NULL;
1395 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 1394 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1396 } 1395 }
1397#endif
1398 1396
1399 /* allocate wb buffer */ 1397 /* allocate wb buffer */
1400 r = radeon_wb_init(rdev); 1398 r = radeon_wb_init(rdev);
@@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)
1452 1450
1453int cayman_suspend(struct radeon_device *rdev) 1451int cayman_suspend(struct radeon_device *rdev)
1454{ 1452{
1455 /* int r; */ 1453 int r;
1456 1454
1457 /* FIXME: we should wait for ring to be empty */ 1455 /* FIXME: we should wait for ring to be empty */
1458 cayman_cp_enable(rdev, false); 1456 cayman_cp_enable(rdev, false);
@@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
1461 radeon_wb_disable(rdev); 1459 radeon_wb_disable(rdev);
1462 cayman_pcie_gart_disable(rdev); 1460 cayman_pcie_gart_disable(rdev);
1463 1461
1464#if 0
1465 /* unpin shaders bo */ 1462 /* unpin shaders bo */
1466 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 1463 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
1467 if (likely(r == 0)) { 1464 if (likely(r == 0)) {
1468 radeon_bo_unpin(rdev->r600_blit.shader_obj); 1465 radeon_bo_unpin(rdev->r600_blit.shader_obj);
1469 radeon_bo_unreserve(rdev->r600_blit.shader_obj); 1466 radeon_bo_unreserve(rdev->r600_blit.shader_obj);
1470 } 1467 }
1471#endif 1468
1472 return 0; 1469 return 0;
1473} 1470}
1474 1471
@@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)
1580 1577
1581void cayman_fini(struct radeon_device *rdev) 1578void cayman_fini(struct radeon_device *rdev)
1582{ 1579{
1583 /* cayman_blit_fini(rdev); */ 1580 evergreen_blit_fini(rdev);
1584 cayman_cp_fini(rdev); 1581 cayman_cp_fini(rdev);
1585 r600_irq_fini(rdev); 1582 r600_irq_fini(rdev);
1586 radeon_wb_fini(rdev); 1583 radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index d948265db87e..b9b3c2a2b119 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
906 .get_vblank_counter = &evergreen_get_vblank_counter, 906 .get_vblank_counter = &evergreen_get_vblank_counter,
907 .fence_ring_emit = &r600_fence_ring_emit, 907 .fence_ring_emit = &r600_fence_ring_emit,
908 .cs_parse = &evergreen_cs_parse, 908 .cs_parse = &evergreen_cs_parse,
909 .copy_blit = NULL, 909 .copy_blit = &evergreen_copy_blit,
910 .copy_dma = NULL, 910 .copy_dma = &evergreen_copy_blit,
911 .copy = NULL, 911 .copy = &evergreen_copy_blit,
912 .get_engine_clock = &radeon_atom_get_engine_clock, 912 .get_engine_clock = &radeon_atom_get_engine_clock,
913 .set_engine_clock = &radeon_atom_set_engine_clock, 913 .set_engine_clock = &radeon_atom_set_engine_clock,
914 .get_memory_clock = &radeon_atom_get_memory_clock, 914 .get_memory_clock = &radeon_atom_get_memory_clock,