aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2010-10-21 13:31:38 -0400
committerDave Airlie <airlied@redhat.com>2010-10-26 00:42:39 -0400
commit2281a378e1830d7ab78d3067f228e4e55d368b0d (patch)
tree8fce4594a1d1b8854d219f5297ccb5ecfc263b10
parentc3cceeddf0b5f97b0d2352b98ef0f025e31a9ae3 (diff)
drm/radeon/kms/evergreen: set the clear state to the blit state
The hw stores a default clear state for registers in the context range that can be initialized when the CP is set up. Set the blit state as the default clear state and use the CLEAR_STATE packet to load the blit state rather than loading it from an IB. This reduces overhead when doing bo moves using the 3D engine. Signed-off-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c38
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c60
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_shaders.c19
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h2
4 files changed, 65 insertions, 54 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 17b2fe925ce0..f12a5b3ec050 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -32,6 +32,7 @@
32#include "atom.h" 32#include "atom.h"
33#include "avivod.h" 33#include "avivod.h"
34#include "evergreen_reg.h" 34#include "evergreen_reg.h"
35#include "evergreen_blit_shaders.h"
35 36
36#define EVERGREEN_PFP_UCODE_SIZE 1120 37#define EVERGREEN_PFP_UCODE_SIZE 1120
37#define EVERGREEN_PM4_UCODE_SIZE 1376 38#define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -1112,7 +1113,7 @@ static int evergreen_cp_load_microcode(struct radeon_device *rdev)
1112 1113
1113static int evergreen_cp_start(struct radeon_device *rdev) 1114static int evergreen_cp_start(struct radeon_device *rdev)
1114{ 1115{
1115 int r; 1116 int r, i;
1116 uint32_t cp_me; 1117 uint32_t cp_me;
1117 1118
1118 r = radeon_ring_lock(rdev, 7); 1119 r = radeon_ring_lock(rdev, 7);
@@ -1132,16 +1133,39 @@ static int evergreen_cp_start(struct radeon_device *rdev)
1132 cp_me = 0xff; 1133 cp_me = 0xff;
1133 WREG32(CP_ME_CNTL, cp_me); 1134 WREG32(CP_ME_CNTL, cp_me);
1134 1135
1135 r = radeon_ring_lock(rdev, 4); 1136 r = radeon_ring_lock(rdev, evergreen_default_size + 15);
1136 if (r) { 1137 if (r) {
1137 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 1138 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1138 return r; 1139 return r;
1139 } 1140 }
1140 /* init some VGT regs */ 1141
1141 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1142 /* setup clear context state */
1142 radeon_ring_write(rdev, (VGT_VERTEX_REUSE_BLOCK_CNTL - PACKET3_SET_CONTEXT_REG_START) >> 2); 1143 radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1143 radeon_ring_write(rdev, 0xe); 1144 radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1144 radeon_ring_write(rdev, 0x10); 1145
1146 for (i = 0; i < evergreen_default_size; i++)
1147 radeon_ring_write(rdev, evergreen_default_state[i]);
1148
1149 radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1150 radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE);
1151
1152 /* set clear context state */
1153 radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
1154 radeon_ring_write(rdev, 0);
1155
1156 /* SQ_VTX_BASE_VTX_LOC */
1157 radeon_ring_write(rdev, 0xc0026f00);
1158 radeon_ring_write(rdev, 0x00000000);
1159 radeon_ring_write(rdev, 0x00000000);
1160 radeon_ring_write(rdev, 0x00000000);
1161
1162 /* Clear consts */
1163 radeon_ring_write(rdev, 0xc0036f00);
1164 radeon_ring_write(rdev, 0x00000bc4);
1165 radeon_ring_write(rdev, 0xffffffff);
1166 radeon_ring_write(rdev, 0xffffffff);
1167 radeon_ring_write(rdev, 0xffffffff);
1168
1145 radeon_ring_unlock_commit(rdev); 1169 radeon_ring_unlock_commit(rdev);
1146 1170
1147 return 0; 1171 return 0;
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index a9825aa324b4..086b9b0416c4 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -230,7 +230,7 @@ draw_auto(struct radeon_device *rdev)
230 230
231} 231}
232 232
233/* emits 20 */ 233/* emits 30 */
234static void 234static void
235set_default_state(struct radeon_device *rdev) 235set_default_state(struct radeon_device *rdev)
236{ 236{
@@ -243,8 +243,6 @@ set_default_state(struct radeon_device *rdev)
243 int num_hs_threads, num_ls_threads; 243 int num_hs_threads, num_ls_threads;
244 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; 244 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
245 int num_hs_stack_entries, num_ls_stack_entries; 245 int num_hs_stack_entries, num_ls_stack_entries;
246 u64 gpu_addr;
247 int dwords;
248 246
249 switch (rdev->family) { 247 switch (rdev->family) {
250 case CHIP_CEDAR: 248 case CHIP_CEDAR:
@@ -369,13 +367,9 @@ set_default_state(struct radeon_device *rdev)
369 sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | 367 sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
370 NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); 368 NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
371 369
372 /* emit an IB pointing at default state */ 370 /* set clear context state */
373 dwords = ALIGN(rdev->r600_blit.state_len, 0x10); 371 radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
374 gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; 372 radeon_ring_write(rdev, 0);
375 radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
376 radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
377 radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
378 radeon_ring_write(rdev, dwords);
379 373
380 /* disable dyn gprs */ 374 /* disable dyn gprs */
381 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 375 radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
@@ -396,6 +390,25 @@ set_default_state(struct radeon_device *rdev)
396 radeon_ring_write(rdev, sq_stack_resource_mgmt_1); 390 radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
397 radeon_ring_write(rdev, sq_stack_resource_mgmt_2); 391 radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
398 radeon_ring_write(rdev, sq_stack_resource_mgmt_3); 392 radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
393
394 /* CONTEXT_CONTROL */
395 radeon_ring_write(rdev, 0xc0012800);
396 radeon_ring_write(rdev, 0x80000000);
397 radeon_ring_write(rdev, 0x80000000);
398
399 /* SQ_VTX_BASE_VTX_LOC */
400 radeon_ring_write(rdev, 0xc0026f00);
401 radeon_ring_write(rdev, 0x00000000);
402 radeon_ring_write(rdev, 0x00000000);
403 radeon_ring_write(rdev, 0x00000000);
404
405 /* SET_SAMPLER */
406 radeon_ring_write(rdev, 0xc0036e00);
407 radeon_ring_write(rdev, 0x00000000);
408 radeon_ring_write(rdev, 0x00000012);
409 radeon_ring_write(rdev, 0x00000000);
410 radeon_ring_write(rdev, 0x00000000);
411
399} 412}
400 413
401static inline uint32_t i2f(uint32_t input) 414static inline uint32_t i2f(uint32_t input)
@@ -426,10 +439,8 @@ static inline uint32_t i2f(uint32_t input)
426int evergreen_blit_init(struct radeon_device *rdev) 439int evergreen_blit_init(struct radeon_device *rdev)
427{ 440{
428 u32 obj_size; 441 u32 obj_size;
429 int r, dwords; 442 int r;
430 void *ptr; 443 void *ptr;
431 u32 packet2s[16];
432 int num_packet2s = 0;
433 444
434 /* pin copy shader into vram if already initialized */ 445 /* pin copy shader into vram if already initialized */
435 if (rdev->r600_blit.shader_obj) 446 if (rdev->r600_blit.shader_obj)
@@ -437,17 +448,8 @@ int evergreen_blit_init(struct radeon_device *rdev)
437 448
438 mutex_init(&rdev->r600_blit.mutex); 449 mutex_init(&rdev->r600_blit.mutex);
439 rdev->r600_blit.state_offset = 0; 450 rdev->r600_blit.state_offset = 0;
440 451 rdev->r600_blit.state_len = 0;
441 rdev->r600_blit.state_len = evergreen_default_size; 452 obj_size = 0;
442
443 dwords = rdev->r600_blit.state_len;
444 while (dwords & 0xf) {
445 packet2s[num_packet2s++] = PACKET2(0);
446 dwords++;
447 }
448
449 obj_size = dwords * 4;
450 obj_size = ALIGN(obj_size, 256);
451 453
452 rdev->r600_blit.vs_offset = obj_size; 454 rdev->r600_blit.vs_offset = obj_size;
453 obj_size += evergreen_vs_size * 4; 455 obj_size += evergreen_vs_size * 4;
@@ -477,12 +479,6 @@ int evergreen_blit_init(struct radeon_device *rdev)
477 return r; 479 return r;
478 } 480 }
479 481
480 memcpy_toio(ptr + rdev->r600_blit.state_offset,
481 evergreen_default_state, rdev->r600_blit.state_len * 4);
482
483 if (num_packet2s)
484 memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
485 packet2s, num_packet2s * 4);
486 memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4); 482 memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4);
487 memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4); 483 memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4);
488 radeon_bo_kunmap(rdev->r600_blit.shader_obj); 484 radeon_bo_kunmap(rdev->r600_blit.shader_obj);
@@ -566,7 +562,7 @@ int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
566 /* calculate number of loops correctly */ 562 /* calculate number of loops correctly */
567 ring_size = num_loops * dwords_per_loop; 563 ring_size = num_loops * dwords_per_loop;
568 /* set default + shaders */ 564 /* set default + shaders */
569 ring_size += 36; /* shaders + def state */ 565 ring_size += 46; /* shaders + def state */
570 ring_size += 10; /* fence emit for VB IB */ 566 ring_size += 10; /* fence emit for VB IB */
571 ring_size += 5; /* done copy */ 567 ring_size += 5; /* done copy */
572 ring_size += 10; /* fence emit for done copy */ 568 ring_size += 10; /* fence emit for done copy */
@@ -574,7 +570,7 @@ int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
574 if (r) 570 if (r)
575 return r; 571 return r;
576 572
577 set_default_state(rdev); /* 20 */ 573 set_default_state(rdev); /* 30 */
578 set_shaders(rdev); /* 16 */ 574 set_shaders(rdev); /* 16 */
579 return 0; 575 return 0;
580} 576}
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
index 5d5045027b46..ef1d28c07fbf 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
@@ -39,10 +39,6 @@
39 39
40const u32 evergreen_default_state[] = 40const u32 evergreen_default_state[] =
41{ 41{
42 0xc0012800, /* CONTEXT_CONTROL */
43 0x80000000,
44 0x80000000,
45
46 0xc0016900, 42 0xc0016900,
47 0x0000023b, 43 0x0000023b,
48 0x00000000, /* SQ_LDS_ALLOC_PS */ 44 0x00000000, /* SQ_LDS_ALLOC_PS */
@@ -63,17 +59,11 @@ const u32 evergreen_default_state[] =
63 0x00000000, 59 0x00000000,
64 0x00000000, 60 0x00000000,
65 61
66 0xc0026f00,
67 0x00000000,
68 0x00000000, /* SQ_VTX_BASE_VTX_LOC */
69 0x00000000,
70
71 0xc0026900, 62 0xc0026900,
72 0x00000010, 63 0x00000010,
73 0x00000000, /* DB_Z_INFO */ 64 0x00000000, /* DB_Z_INFO */
74 0x00000000, /* DB_STENCIL_INFO */ 65 0x00000000, /* DB_STENCIL_INFO */
75 66
76
77 0xc0016900, 67 0xc0016900,
78 0x00000200, 68 0x00000200,
79 0x00000000, /* DB_DEPTH_CONTROL */ 69 0x00000000, /* DB_DEPTH_CONTROL */
@@ -303,11 +293,10 @@ const u32 evergreen_default_state[] =
303 0x00000000, /* */ 293 0x00000000, /* */
304 0x00000000, /* */ 294 0x00000000, /* */
305 295
306 0xc0036e00, /* SET_SAMPLER */ 296 0xc0026900,
307 0x00000000, 297 0x00000316,
308 0x00000012, 298 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
309 0x00000000, 299 0x00000010, /* */
310 0x00000000,
311}; 300};
312 301
313const u32 evergreen_vs[] = 302const u32 evergreen_vs[] =
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index d507f438eed0..113c70cc8b39 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -658,6 +658,8 @@
658#define PACKET3_EVENT_WRITE_EOP 0x47 658#define PACKET3_EVENT_WRITE_EOP 0x47
659#define PACKET3_EVENT_WRITE_EOS 0x48 659#define PACKET3_EVENT_WRITE_EOS 0x48
660#define PACKET3_PREAMBLE_CNTL 0x4A 660#define PACKET3_PREAMBLE_CNTL 0x4A
661# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
662# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
661#define PACKET3_RB_OFFSET 0x4B 663#define PACKET3_RB_OFFSET 0x4B
662#define PACKET3_ALU_PS_CONST_BUFFER_COPY 0x4C 664#define PACKET3_ALU_PS_CONST_BUFFER_COPY 0x4C
663#define PACKET3_ALU_VS_CONST_BUFFER_COPY 0x4D 665#define PACKET3_ALU_VS_CONST_BUFFER_COPY 0x4D