aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vc4/vc4_validate_shaders.c
diff options
context:
space:
mode:
authorJonas Pfeil <pfeiljonas@gmx.de>2016-11-07 18:18:39 -0500
committerEric Anholt <eric@anholt.net>2016-11-16 16:25:26 -0500
commitc778cc5df944291dcdb1ca7a6bb781fbc22550c5 (patch)
tree98fc974a019af9af6975499fc68c267f78a42f34 /drivers/gpu/drm/vc4/vc4_validate_shaders.c
parent3a62234680d86efa0239665ed8a0e908f1aef147 (diff)
drm/vc4: Add fragment shader threading support
FS threading brings performance improvements of 0-20% in glmark2. The validation code checks for thread switch signals and ensures that the registers of the other thread are not touched, and that our clamps are not live across thread switches. It also checks that the threading and branching instructions do not interfere. (Original patch by Jonas, changes by anholt for style cleanup, removing validation the kernel doesn't need to do, and adding the flag for userspace). v2: Minor style fixes from checkpatch. Signed-off-by: Jonas Pfeil <pfeiljonas@gmx.de> Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_validate_shaders.c')
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate_shaders.c63
1 files changed, 63 insertions, 0 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
index 917321ce832f..5dba13dd1e9b 100644
--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -83,6 +83,13 @@ struct vc4_shader_validation_state {
83 * basic blocks. 83 * basic blocks.
84 */ 84 */
85 bool needs_uniform_address_for_loop; 85 bool needs_uniform_address_for_loop;
86
87 /* Set when we find an instruction writing the top half of the
88 * register files. If we allowed writing the unusable regs in
89 * a threaded shader, then the other shader running on our
90 * QPU's clamp validation would be invalid.
91 */
92 bool all_registers_used;
86}; 93};
87 94
88static uint32_t 95static uint32_t
@@ -119,6 +126,13 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
119} 126}
120 127
121static bool 128static bool
129live_reg_is_upper_half(uint32_t lri)
130{
131 return (lri >= 16 && lri < 32) ||
132 (lri >= 32 + 16 && lri < 32 + 32);
133}
134
135static bool
122is_tmu_submit(uint32_t waddr) 136is_tmu_submit(uint32_t waddr)
123{ 137{
124 return (waddr == QPU_W_TMU0_S || 138 return (waddr == QPU_W_TMU0_S ||
@@ -390,6 +404,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
390 } else { 404 } else {
391 validation_state->live_immediates[lri] = ~0; 405 validation_state->live_immediates[lri] = ~0;
392 } 406 }
407
408 if (live_reg_is_upper_half(lri))
409 validation_state->all_registers_used = true;
393 } 410 }
394 411
395 switch (waddr) { 412 switch (waddr) {
@@ -598,6 +615,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
598 } 615 }
599 } 616 }
600 617
618 if ((raddr_a >= 16 && raddr_a < 32) ||
619 (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
620 validation_state->all_registers_used = true;
621 }
622
601 return true; 623 return true;
602} 624}
603 625
@@ -753,6 +775,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
753{ 775{
754 bool found_shader_end = false; 776 bool found_shader_end = false;
755 int shader_end_ip = 0; 777 int shader_end_ip = 0;
778 uint32_t last_thread_switch_ip = -3;
756 uint32_t ip; 779 uint32_t ip;
757 struct vc4_validated_shader_info *validated_shader = NULL; 780 struct vc4_validated_shader_info *validated_shader = NULL;
758 struct vc4_shader_validation_state validation_state; 781 struct vc4_shader_validation_state validation_state;
@@ -785,6 +808,17 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
785 if (!vc4_handle_branch_target(&validation_state)) 808 if (!vc4_handle_branch_target(&validation_state))
786 goto fail; 809 goto fail;
787 810
811 if (ip == last_thread_switch_ip + 3) {
812 /* Reset r0-r3 live clamp data */
813 int i;
814
815 for (i = 64; i < LIVE_REG_COUNT; i++) {
816 validation_state.live_min_clamp_offsets[i] = ~0;
817 validation_state.live_max_clamp_regs[i] = false;
818 validation_state.live_immediates[i] = ~0;
819 }
820 }
821
788 switch (sig) { 822 switch (sig) {
789 case QPU_SIG_NONE: 823 case QPU_SIG_NONE:
790 case QPU_SIG_WAIT_FOR_SCOREBOARD: 824 case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -794,6 +828,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
794 case QPU_SIG_LOAD_TMU1: 828 case QPU_SIG_LOAD_TMU1:
795 case QPU_SIG_PROG_END: 829 case QPU_SIG_PROG_END:
796 case QPU_SIG_SMALL_IMM: 830 case QPU_SIG_SMALL_IMM:
831 case QPU_SIG_THREAD_SWITCH:
832 case QPU_SIG_LAST_THREAD_SWITCH:
797 if (!check_instruction_writes(validated_shader, 833 if (!check_instruction_writes(validated_shader,
798 &validation_state)) { 834 &validation_state)) {
799 DRM_ERROR("Bad write at ip %d\n", ip); 835 DRM_ERROR("Bad write at ip %d\n", ip);
@@ -809,6 +845,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
809 shader_end_ip = ip; 845 shader_end_ip = ip;
810 } 846 }
811 847
848 if (sig == QPU_SIG_THREAD_SWITCH ||
849 sig == QPU_SIG_LAST_THREAD_SWITCH) {
850 validated_shader->is_threaded = true;
851
852 if (ip < last_thread_switch_ip + 3) {
853 DRM_ERROR("Thread switch too soon after "
854 "last switch at ip %d\n", ip);
855 goto fail;
856 }
857 last_thread_switch_ip = ip;
858 }
859
812 break; 860 break;
813 861
814 case QPU_SIG_LOAD_IMM: 862 case QPU_SIG_LOAD_IMM:
@@ -823,6 +871,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
823 if (!check_branch(inst, validated_shader, 871 if (!check_branch(inst, validated_shader,
824 &validation_state, ip)) 872 &validation_state, ip))
825 goto fail; 873 goto fail;
874
875 if (ip < last_thread_switch_ip + 3) {
876 DRM_ERROR("Branch in thread switch at ip %d",
877 ip);
878 goto fail;
879 }
880
826 break; 881 break;
827 default: 882 default:
828 DRM_ERROR("Unsupported QPU signal %d at " 883 DRM_ERROR("Unsupported QPU signal %d at "
@@ -844,6 +899,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
844 goto fail; 899 goto fail;
845 } 900 }
846 901
902 /* Might corrupt other thread */
903 if (validated_shader->is_threaded &&
904 validation_state.all_registers_used) {
905 DRM_ERROR("Shader uses threading, but uses the upper "
906 "half of the registers, too\n");
907 goto fail;
908 }
909
847 /* If we did a backwards branch and we haven't emitted a uniforms 910 /* If we did a backwards branch and we haven't emitted a uniforms
848 * reset since then, we still need the uniforms stream to have the 911 * reset since then, we still need the uniforms stream to have the
849 * uniforms address available so that the backwards branch can do its 912 * uniforms address available so that the backwards branch can do its