diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2010-10-21 13:31:38 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2010-10-26 00:42:39 -0400 |
commit | 2281a378e1830d7ab78d3067f228e4e55d368b0d (patch) | |
tree | 8fce4594a1d1b8854d219f5297ccb5ecfc263b10 /drivers/gpu/drm | |
parent | c3cceeddf0b5f97b0d2352b98ef0f025e31a9ae3 (diff) |
drm/radeon/kms/evergreen: set the clear state to the blit state
The hw stores a default clear state for registers in the context
range that can be initialized when the CP is set up. Set the
blit state as the default clear state and use the CLEAR_STATE
packet to load the blit state rather than loading it from an IB.
This reduces overhead when doing bo moves using the 3D engine.
Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_kms.c | 60 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_shaders.c | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreend.h | 2 |
4 files changed, 65 insertions, 54 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 17b2fe925ce0..f12a5b3ec050 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include "atom.h" | 32 | #include "atom.h" |
33 | #include "avivod.h" | 33 | #include "avivod.h" |
34 | #include "evergreen_reg.h" | 34 | #include "evergreen_reg.h" |
35 | #include "evergreen_blit_shaders.h" | ||
35 | 36 | ||
36 | #define EVERGREEN_PFP_UCODE_SIZE 1120 | 37 | #define EVERGREEN_PFP_UCODE_SIZE 1120 |
37 | #define EVERGREEN_PM4_UCODE_SIZE 1376 | 38 | #define EVERGREEN_PM4_UCODE_SIZE 1376 |
@@ -1112,7 +1113,7 @@ static int evergreen_cp_load_microcode(struct radeon_device *rdev) | |||
1112 | 1113 | ||
1113 | static int evergreen_cp_start(struct radeon_device *rdev) | 1114 | static int evergreen_cp_start(struct radeon_device *rdev) |
1114 | { | 1115 | { |
1115 | int r; | 1116 | int r, i; |
1116 | uint32_t cp_me; | 1117 | uint32_t cp_me; |
1117 | 1118 | ||
1118 | r = radeon_ring_lock(rdev, 7); | 1119 | r = radeon_ring_lock(rdev, 7); |
@@ -1132,16 +1133,39 @@ static int evergreen_cp_start(struct radeon_device *rdev) | |||
1132 | cp_me = 0xff; | 1133 | cp_me = 0xff; |
1133 | WREG32(CP_ME_CNTL, cp_me); | 1134 | WREG32(CP_ME_CNTL, cp_me); |
1134 | 1135 | ||
1135 | r = radeon_ring_lock(rdev, 4); | 1136 | r = radeon_ring_lock(rdev, evergreen_default_size + 15); |
1136 | if (r) { | 1137 | if (r) { |
1137 | DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); | 1138 | DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); |
1138 | return r; | 1139 | return r; |
1139 | } | 1140 | } |
1140 | /* init some VGT regs */ | 1141 | |
1141 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); | 1142 | /* setup clear context state */ |
1142 | radeon_ring_write(rdev, (VGT_VERTEX_REUSE_BLOCK_CNTL - PACKET3_SET_CONTEXT_REG_START) >> 2); | 1143 | radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
1143 | radeon_ring_write(rdev, 0xe); | 1144 | radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); |
1144 | radeon_ring_write(rdev, 0x10); | 1145 | |
1146 | for (i = 0; i < evergreen_default_size; i++) | ||
1147 | radeon_ring_write(rdev, evergreen_default_state[i]); | ||
1148 | |||
1149 | radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); | ||
1150 | radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE); | ||
1151 | |||
1152 | /* set clear context state */ | ||
1153 | radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); | ||
1154 | radeon_ring_write(rdev, 0); | ||
1155 | |||
1156 | /* SQ_VTX_BASE_VTX_LOC */ | ||
1157 | radeon_ring_write(rdev, 0xc0026f00); | ||
1158 | radeon_ring_write(rdev, 0x00000000); | ||
1159 | radeon_ring_write(rdev, 0x00000000); | ||
1160 | radeon_ring_write(rdev, 0x00000000); | ||
1161 | |||
1162 | /* Clear consts */ | ||
1163 | radeon_ring_write(rdev, 0xc0036f00); | ||
1164 | radeon_ring_write(rdev, 0x00000bc4); | ||
1165 | radeon_ring_write(rdev, 0xffffffff); | ||
1166 | radeon_ring_write(rdev, 0xffffffff); | ||
1167 | radeon_ring_write(rdev, 0xffffffff); | ||
1168 | |||
1145 | radeon_ring_unlock_commit(rdev); | 1169 | radeon_ring_unlock_commit(rdev); |
1146 | 1170 | ||
1147 | return 0; | 1171 | return 0; |
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index a9825aa324b4..086b9b0416c4 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c | |||
@@ -230,7 +230,7 @@ draw_auto(struct radeon_device *rdev) | |||
230 | 230 | ||
231 | } | 231 | } |
232 | 232 | ||
233 | /* emits 20 */ | 233 | /* emits 30 */ |
234 | static void | 234 | static void |
235 | set_default_state(struct radeon_device *rdev) | 235 | set_default_state(struct radeon_device *rdev) |
236 | { | 236 | { |
@@ -243,8 +243,6 @@ set_default_state(struct radeon_device *rdev) | |||
243 | int num_hs_threads, num_ls_threads; | 243 | int num_hs_threads, num_ls_threads; |
244 | int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; | 244 | int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; |
245 | int num_hs_stack_entries, num_ls_stack_entries; | 245 | int num_hs_stack_entries, num_ls_stack_entries; |
246 | u64 gpu_addr; | ||
247 | int dwords; | ||
248 | 246 | ||
249 | switch (rdev->family) { | 247 | switch (rdev->family) { |
250 | case CHIP_CEDAR: | 248 | case CHIP_CEDAR: |
@@ -369,13 +367,9 @@ set_default_state(struct radeon_device *rdev) | |||
369 | sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | | 367 | sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | |
370 | NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); | 368 | NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); |
371 | 369 | ||
372 | /* emit an IB pointing at default state */ | 370 | /* set clear context state */ |
373 | dwords = ALIGN(rdev->r600_blit.state_len, 0x10); | 371 | radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); |
374 | gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; | 372 | radeon_ring_write(rdev, 0); |
375 | radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); | ||
376 | radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); | ||
377 | radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); | ||
378 | radeon_ring_write(rdev, dwords); | ||
379 | 373 | ||
380 | /* disable dyn gprs */ | 374 | /* disable dyn gprs */ |
381 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); | 375 | radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); |
@@ -396,6 +390,25 @@ set_default_state(struct radeon_device *rdev) | |||
396 | radeon_ring_write(rdev, sq_stack_resource_mgmt_1); | 390 | radeon_ring_write(rdev, sq_stack_resource_mgmt_1); |
397 | radeon_ring_write(rdev, sq_stack_resource_mgmt_2); | 391 | radeon_ring_write(rdev, sq_stack_resource_mgmt_2); |
398 | radeon_ring_write(rdev, sq_stack_resource_mgmt_3); | 392 | radeon_ring_write(rdev, sq_stack_resource_mgmt_3); |
393 | |||
394 | /* CONTEXT_CONTROL */ | ||
395 | radeon_ring_write(rdev, 0xc0012800); | ||
396 | radeon_ring_write(rdev, 0x80000000); | ||
397 | radeon_ring_write(rdev, 0x80000000); | ||
398 | |||
399 | /* SQ_VTX_BASE_VTX_LOC */ | ||
400 | radeon_ring_write(rdev, 0xc0026f00); | ||
401 | radeon_ring_write(rdev, 0x00000000); | ||
402 | radeon_ring_write(rdev, 0x00000000); | ||
403 | radeon_ring_write(rdev, 0x00000000); | ||
404 | |||
405 | /* SET_SAMPLER */ | ||
406 | radeon_ring_write(rdev, 0xc0036e00); | ||
407 | radeon_ring_write(rdev, 0x00000000); | ||
408 | radeon_ring_write(rdev, 0x00000012); | ||
409 | radeon_ring_write(rdev, 0x00000000); | ||
410 | radeon_ring_write(rdev, 0x00000000); | ||
411 | |||
399 | } | 412 | } |
400 | 413 | ||
401 | static inline uint32_t i2f(uint32_t input) | 414 | static inline uint32_t i2f(uint32_t input) |
@@ -426,10 +439,8 @@ static inline uint32_t i2f(uint32_t input) | |||
426 | int evergreen_blit_init(struct radeon_device *rdev) | 439 | int evergreen_blit_init(struct radeon_device *rdev) |
427 | { | 440 | { |
428 | u32 obj_size; | 441 | u32 obj_size; |
429 | int r, dwords; | 442 | int r; |
430 | void *ptr; | 443 | void *ptr; |
431 | u32 packet2s[16]; | ||
432 | int num_packet2s = 0; | ||
433 | 444 | ||
434 | /* pin copy shader into vram if already initialized */ | 445 | /* pin copy shader into vram if already initialized */ |
435 | if (rdev->r600_blit.shader_obj) | 446 | if (rdev->r600_blit.shader_obj) |
@@ -437,17 +448,8 @@ int evergreen_blit_init(struct radeon_device *rdev) | |||
437 | 448 | ||
438 | mutex_init(&rdev->r600_blit.mutex); | 449 | mutex_init(&rdev->r600_blit.mutex); |
439 | rdev->r600_blit.state_offset = 0; | 450 | rdev->r600_blit.state_offset = 0; |
440 | 451 | rdev->r600_blit.state_len = 0; | |
441 | rdev->r600_blit.state_len = evergreen_default_size; | 452 | obj_size = 0; |
442 | |||
443 | dwords = rdev->r600_blit.state_len; | ||
444 | while (dwords & 0xf) { | ||
445 | packet2s[num_packet2s++] = PACKET2(0); | ||
446 | dwords++; | ||
447 | } | ||
448 | |||
449 | obj_size = dwords * 4; | ||
450 | obj_size = ALIGN(obj_size, 256); | ||
451 | 453 | ||
452 | rdev->r600_blit.vs_offset = obj_size; | 454 | rdev->r600_blit.vs_offset = obj_size; |
453 | obj_size += evergreen_vs_size * 4; | 455 | obj_size += evergreen_vs_size * 4; |
@@ -477,12 +479,6 @@ int evergreen_blit_init(struct radeon_device *rdev) | |||
477 | return r; | 479 | return r; |
478 | } | 480 | } |
479 | 481 | ||
480 | memcpy_toio(ptr + rdev->r600_blit.state_offset, | ||
481 | evergreen_default_state, rdev->r600_blit.state_len * 4); | ||
482 | |||
483 | if (num_packet2s) | ||
484 | memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), | ||
485 | packet2s, num_packet2s * 4); | ||
486 | memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4); | 482 | memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4); |
487 | memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4); | 483 | memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4); |
488 | radeon_bo_kunmap(rdev->r600_blit.shader_obj); | 484 | radeon_bo_kunmap(rdev->r600_blit.shader_obj); |
@@ -566,7 +562,7 @@ int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) | |||
566 | /* calculate number of loops correctly */ | 562 | /* calculate number of loops correctly */ |
567 | ring_size = num_loops * dwords_per_loop; | 563 | ring_size = num_loops * dwords_per_loop; |
568 | /* set default + shaders */ | 564 | /* set default + shaders */ |
569 | ring_size += 36; /* shaders + def state */ | 565 | ring_size += 46; /* shaders + def state */ |
570 | ring_size += 10; /* fence emit for VB IB */ | 566 | ring_size += 10; /* fence emit for VB IB */ |
571 | ring_size += 5; /* done copy */ | 567 | ring_size += 5; /* done copy */ |
572 | ring_size += 10; /* fence emit for done copy */ | 568 | ring_size += 10; /* fence emit for done copy */ |
@@ -574,7 +570,7 @@ int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) | |||
574 | if (r) | 570 | if (r) |
575 | return r; | 571 | return r; |
576 | 572 | ||
577 | set_default_state(rdev); /* 20 */ | 573 | set_default_state(rdev); /* 30 */ |
578 | set_shaders(rdev); /* 16 */ | 574 | set_shaders(rdev); /* 16 */ |
579 | return 0; | 575 | return 0; |
580 | } | 576 | } |
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c index 5d5045027b46..ef1d28c07fbf 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c | |||
@@ -39,10 +39,6 @@ | |||
39 | 39 | ||
40 | const u32 evergreen_default_state[] = | 40 | const u32 evergreen_default_state[] = |
41 | { | 41 | { |
42 | 0xc0012800, /* CONTEXT_CONTROL */ | ||
43 | 0x80000000, | ||
44 | 0x80000000, | ||
45 | |||
46 | 0xc0016900, | 42 | 0xc0016900, |
47 | 0x0000023b, | 43 | 0x0000023b, |
48 | 0x00000000, /* SQ_LDS_ALLOC_PS */ | 44 | 0x00000000, /* SQ_LDS_ALLOC_PS */ |
@@ -63,17 +59,11 @@ const u32 evergreen_default_state[] = | |||
63 | 0x00000000, | 59 | 0x00000000, |
64 | 0x00000000, | 60 | 0x00000000, |
65 | 61 | ||
66 | 0xc0026f00, | ||
67 | 0x00000000, | ||
68 | 0x00000000, /* SQ_VTX_BASE_VTX_LOC */ | ||
69 | 0x00000000, | ||
70 | |||
71 | 0xc0026900, | 62 | 0xc0026900, |
72 | 0x00000010, | 63 | 0x00000010, |
73 | 0x00000000, /* DB_Z_INFO */ | 64 | 0x00000000, /* DB_Z_INFO */ |
74 | 0x00000000, /* DB_STENCIL_INFO */ | 65 | 0x00000000, /* DB_STENCIL_INFO */ |
75 | 66 | ||
76 | |||
77 | 0xc0016900, | 67 | 0xc0016900, |
78 | 0x00000200, | 68 | 0x00000200, |
79 | 0x00000000, /* DB_DEPTH_CONTROL */ | 69 | 0x00000000, /* DB_DEPTH_CONTROL */ |
@@ -303,11 +293,10 @@ const u32 evergreen_default_state[] = | |||
303 | 0x00000000, /* */ | 293 | 0x00000000, /* */ |
304 | 0x00000000, /* */ | 294 | 0x00000000, /* */ |
305 | 295 | ||
306 | 0xc0036e00, /* SET_SAMPLER */ | 296 | 0xc0026900, |
307 | 0x00000000, | 297 | 0x00000316, |
308 | 0x00000012, | 298 | 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ |
309 | 0x00000000, | 299 | 0x00000010, /* */ |
310 | 0x00000000, | ||
311 | }; | 300 | }; |
312 | 301 | ||
313 | const u32 evergreen_vs[] = | 302 | const u32 evergreen_vs[] = |
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index d507f438eed0..113c70cc8b39 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h | |||
@@ -658,6 +658,8 @@ | |||
658 | #define PACKET3_EVENT_WRITE_EOP 0x47 | 658 | #define PACKET3_EVENT_WRITE_EOP 0x47 |
659 | #define PACKET3_EVENT_WRITE_EOS 0x48 | 659 | #define PACKET3_EVENT_WRITE_EOS 0x48 |
660 | #define PACKET3_PREAMBLE_CNTL 0x4A | 660 | #define PACKET3_PREAMBLE_CNTL 0x4A |
661 | # define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) | ||
662 | # define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) | ||
661 | #define PACKET3_RB_OFFSET 0x4B | 663 | #define PACKET3_RB_OFFSET 0x4B |
662 | #define PACKET3_ALU_PS_CONST_BUFFER_COPY 0x4C | 664 | #define PACKET3_ALU_PS_CONST_BUFFER_COPY 0x4C |
663 | #define PACKET3_ALU_VS_CONST_BUFFER_COPY 0x4D | 665 | #define PACKET3_ALU_VS_CONST_BUFFER_COPY 0x4D |