aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vc4/vc4_validate_shaders.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-07-02 15:17:10 -0400
committerEric Anholt <eric@anholt.net>2016-07-15 18:19:50 -0400
commit6d45c81d229d71da54d374143e7d6abad4c0cf31 (patch)
tree13a537d8fc387d061cdf2d28f0b6599a80aa3237 /drivers/gpu/drm/vc4/vc4_validate_shaders.c
parent93aa9ae3e5523e49e4e5abacd4dbee0e4ab2d931 (diff)
drm/vc4: Add support for branching in shader validation.
We're already checking that branch instructions are between the start of the shader and the proper PROG_END sequence. The other thing we need to make branching safe is to verify that the shader doesn't read past the end of the uniforms stream. To do that, we require that at any basic block reading uniforms have the following instructions: load_imm temp, <next offset within uniform stream> add unif_addr, temp, unif The instructions are generated by userspace, and the kernel verifies that the load_imm is of the expected offset, and that the add adds it to a uniform. We track which uniform in the stream that is, and at draw call time fix up the uniform stream to have the address of the start of the shader's uniforms at that location. Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_validate_shaders.c')
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate_shaders.c281
1 files changed, 265 insertions, 16 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
index b984f123df09..75c8e8fb1827 100644
--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -39,6 +39,8 @@
39#include "vc4_drv.h" 39#include "vc4_drv.h"
40#include "vc4_qpu_defines.h" 40#include "vc4_qpu_defines.h"
41 41
42#define LIVE_REG_COUNT (32 + 32 + 4)
43
42struct vc4_shader_validation_state { 44struct vc4_shader_validation_state {
43 /* Current IP being validated. */ 45 /* Current IP being validated. */
44 uint32_t ip; 46 uint32_t ip;
@@ -57,8 +59,9 @@ struct vc4_shader_validation_state {
57 * 59 *
58 * This is used for the validation of direct address memory reads. 60 * This is used for the validation of direct address memory reads.
59 */ 61 */
60 uint32_t live_min_clamp_offsets[32 + 32 + 4]; 62 uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
61 bool live_max_clamp_regs[32 + 32 + 4]; 63 bool live_max_clamp_regs[LIVE_REG_COUNT];
64 uint32_t live_immediates[LIVE_REG_COUNT];
62 65
63 /* Bitfield of which IPs are used as branch targets. 66 /* Bitfield of which IPs are used as branch targets.
64 * 67 *
@@ -66,6 +69,20 @@ struct vc4_shader_validation_state {
66 * points and clearing the texturing/clamping state. 69 * points and clearing the texturing/clamping state.
67 */ 70 */
68 unsigned long *branch_targets; 71 unsigned long *branch_targets;
72
73 /* Set when entering a basic block, and cleared when the uniform
74 * address update is found. This is used to make sure that we don't
75 * read uniforms when the address is undefined.
76 */
77 bool needs_uniform_address_update;
78
79 /* Set when we find a backwards branch. If the branch is backwards,
80 * the taraget is probably doing an address reset to read uniforms,
81 * and so we need to be sure that a uniforms address is present in the
82 * stream, even if the shader didn't need to read uniforms in later
83 * basic blocks.
84 */
85 bool needs_uniform_address_for_loop;
69}; 86};
70 87
71static uint32_t 88static uint32_t
@@ -227,8 +244,14 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
227 /* Since direct uses a RADDR uniform reference, it will get counted in 244 /* Since direct uses a RADDR uniform reference, it will get counted in
228 * check_instruction_reads() 245 * check_instruction_reads()
229 */ 246 */
230 if (!is_direct) 247 if (!is_direct) {
248 if (validation_state->needs_uniform_address_update) {
249 DRM_ERROR("Texturing with undefined uniform address\n");
250 return false;
251 }
252
231 validated_shader->uniforms_size += 4; 253 validated_shader->uniforms_size += 4;
254 }
232 255
233 if (submit) { 256 if (submit) {
234 if (!record_texture_sample(validated_shader, 257 if (!record_texture_sample(validated_shader,
@@ -242,6 +265,98 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
242 return true; 265 return true;
243} 266}
244 267
268static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
269{
270 uint32_t o = validated_shader->num_uniform_addr_offsets;
271 uint32_t num_uniforms = validated_shader->uniforms_size / 4;
272
273 validated_shader->uniform_addr_offsets =
274 krealloc(validated_shader->uniform_addr_offsets,
275 (o + 1) *
276 sizeof(*validated_shader->uniform_addr_offsets),
277 GFP_KERNEL);
278 if (!validated_shader->uniform_addr_offsets)
279 return false;
280
281 validated_shader->uniform_addr_offsets[o] = num_uniforms;
282 validated_shader->num_uniform_addr_offsets++;
283
284 return true;
285}
286
287static bool
288validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
289 struct vc4_shader_validation_state *validation_state,
290 bool is_mul)
291{
292 uint64_t inst = validation_state->shader[validation_state->ip];
293 u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
294 u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
295 u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
296 u32 add_lri = raddr_add_a_to_live_reg_index(inst);
297 /* We want our reset to be pointing at whatever uniform follows the
298 * uniforms base address.
299 */
300 u32 expected_offset = validated_shader->uniforms_size + 4;
301
302 /* We only support absolute uniform address changes, and we
303 * require that they be in the current basic block before any
304 * of its uniform reads.
305 *
306 * One could potentially emit more efficient QPU code, by
307 * noticing that (say) an if statement does uniform control
308 * flow for all threads and that the if reads the same number
309 * of uniforms on each side. However, this scheme is easy to
310 * validate so it's all we allow for now.
311 */
312
313 if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
314 DRM_ERROR("uniforms address change must be "
315 "normal math\n");
316 return false;
317 }
318
319 if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
320 DRM_ERROR("Uniform address reset must be an ADD.\n");
321 return false;
322 }
323
324 if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
325 DRM_ERROR("Uniform address reset must be unconditional.\n");
326 return false;
327 }
328
329 if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
330 !(inst & QPU_PM)) {
331 DRM_ERROR("No packing allowed on uniforms reset\n");
332 return false;
333 }
334
335 if (add_lri == -1) {
336 DRM_ERROR("First argument of uniform address write must be "
337 "an immediate value.\n");
338 return false;
339 }
340
341 if (validation_state->live_immediates[add_lri] != expected_offset) {
342 DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
343 validation_state->live_immediates[add_lri],
344 expected_offset);
345 return false;
346 }
347
348 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
349 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
350 DRM_ERROR("Second argument of uniform address write must be "
351 "a uniform.\n");
352 return false;
353 }
354
355 validation_state->needs_uniform_address_update = false;
356 validation_state->needs_uniform_address_for_loop = false;
357 return require_uniform_address_uniform(validated_shader);
358}
359
245static bool 360static bool
246check_reg_write(struct vc4_validated_shader_info *validated_shader, 361check_reg_write(struct vc4_validated_shader_info *validated_shader,
247 struct vc4_shader_validation_state *validation_state, 362 struct vc4_shader_validation_state *validation_state,
@@ -251,14 +366,37 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
251 uint32_t waddr = (is_mul ? 366 uint32_t waddr = (is_mul ?
252 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 367 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
253 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 368 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
369 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
370 bool ws = inst & QPU_WS;
371 bool is_b = is_mul ^ ws;
372 u32 lri = waddr_to_live_reg_index(waddr, is_b);
373
374 if (lri != -1) {
375 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
376 uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
377
378 if (sig == QPU_SIG_LOAD_IMM &&
379 QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
380 ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
381 (!is_mul && cond_add == QPU_COND_ALWAYS))) {
382 validation_state->live_immediates[lri] =
383 QPU_GET_FIELD(inst, QPU_LOAD_IMM);
384 } else {
385 validation_state->live_immediates[lri] = ~0;
386 }
387 }
254 388
255 switch (waddr) { 389 switch (waddr) {
256 case QPU_W_UNIFORMS_ADDRESS: 390 case QPU_W_UNIFORMS_ADDRESS:
257 /* XXX: We'll probably need to support this for reladdr, but 391 if (is_b) {
258 * it's definitely a security-related one. 392 DRM_ERROR("relative uniforms address change "
259 */ 393 "unsupported\n");
260 DRM_ERROR("uniforms address load unsupported\n"); 394 return false;
261 return false; 395 }
396
397 return validate_uniform_address_write(validated_shader,
398 validation_state,
399 is_mul);
262 400
263 case QPU_W_TLB_COLOR_MS: 401 case QPU_W_TLB_COLOR_MS:
264 case QPU_W_TLB_COLOR_ALL: 402 case QPU_W_TLB_COLOR_ALL:
@@ -406,9 +544,35 @@ check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
406} 544}
407 545
408static bool 546static bool
409check_instruction_reads(uint64_t inst, 547check_branch(uint64_t inst,
410 struct vc4_validated_shader_info *validated_shader) 548 struct vc4_validated_shader_info *validated_shader,
549 struct vc4_shader_validation_state *validation_state,
550 int ip)
551{
552 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
553 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
554 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
555
556 if ((int)branch_imm < 0)
557 validation_state->needs_uniform_address_for_loop = true;
558
559 /* We don't want to have to worry about validation of this, and
560 * there's no need for it.
561 */
562 if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
563 DRM_ERROR("branch instruction at %d wrote a register.\n",
564 validation_state->ip);
565 return false;
566 }
567
568 return true;
569}
570
571static bool
572check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
573 struct vc4_shader_validation_state *validation_state)
411{ 574{
575 uint64_t inst = validation_state->shader[validation_state->ip];
412 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 576 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
413 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 577 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
414 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 578 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
@@ -420,6 +584,12 @@ check_instruction_reads(uint64_t inst,
420 * already be OOM. 584 * already be OOM.
421 */ 585 */
422 validated_shader->uniforms_size += 4; 586 validated_shader->uniforms_size += 4;
587
588 if (validation_state->needs_uniform_address_update) {
589 DRM_ERROR("Uniform read with undefined uniform "
590 "address\n");
591 return false;
592 }
423 } 593 }
424 594
425 return true; 595 return true;
@@ -516,6 +686,65 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
516 return true; 686 return true;
517} 687}
518 688
689/* Resets any known state for the shader, used when we may be branched to from
690 * multiple locations in the program (or at shader start).
691 */
692static void
693reset_validation_state(struct vc4_shader_validation_state *validation_state)
694{
695 int i;
696
697 for (i = 0; i < 8; i++)
698 validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
699
700 for (i = 0; i < LIVE_REG_COUNT; i++) {
701 validation_state->live_min_clamp_offsets[i] = ~0;
702 validation_state->live_max_clamp_regs[i] = false;
703 validation_state->live_immediates[i] = ~0;
704 }
705}
706
707static bool
708texturing_in_progress(struct vc4_shader_validation_state *validation_state)
709{
710 return (validation_state->tmu_write_count[0] != 0 ||
711 validation_state->tmu_write_count[1] != 0);
712}
713
714static bool
715vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
716{
717 uint32_t ip = validation_state->ip;
718
719 if (!test_bit(ip, validation_state->branch_targets))
720 return true;
721
722 if (texturing_in_progress(validation_state)) {
723 DRM_ERROR("Branch target landed during TMU setup\n");
724 return false;
725 }
726
727 /* Reset our live values tracking, since this instruction may have
728 * multiple predecessors.
729 *
730 * One could potentially do analysis to determine that, for
731 * example, all predecessors have a live max clamp in the same
732 * register, but we don't bother with that.
733 */
734 reset_validation_state(validation_state);
735
736 /* Since we've entered a basic block from potentially multiple
737 * predecessors, we need the uniforms address to be updated before any
738 * unforms are read. We require that after any branch point, the next
739 * uniform to be loaded is a uniform address offset. That uniform's
740 * offset will be marked by the uniform address register write
741 * validation, or a one-off the end-of-program check.
742 */
743 validation_state->needs_uniform_address_update = true;
744
745 return true;
746}
747
519struct vc4_validated_shader_info * 748struct vc4_validated_shader_info *
520vc4_validate_shader(struct drm_gem_cma_object *shader_obj) 749vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
521{ 750{
@@ -524,16 +753,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
524 uint32_t ip; 753 uint32_t ip;
525 struct vc4_validated_shader_info *validated_shader = NULL; 754 struct vc4_validated_shader_info *validated_shader = NULL;
526 struct vc4_shader_validation_state validation_state; 755 struct vc4_shader_validation_state validation_state;
527 int i;
528 756
529 memset(&validation_state, 0, sizeof(validation_state)); 757 memset(&validation_state, 0, sizeof(validation_state));
530 validation_state.shader = shader_obj->vaddr; 758 validation_state.shader = shader_obj->vaddr;
531 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 759 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
532 760
533 for (i = 0; i < 8; i++) 761 reset_validation_state(&validation_state);
534 validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
535 for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
536 validation_state.live_min_clamp_offsets[i] = ~0;
537 762
538 validation_state.branch_targets = 763 validation_state.branch_targets =
539 kcalloc(BITS_TO_LONGS(validation_state.max_ip), 764 kcalloc(BITS_TO_LONGS(validation_state.max_ip),
@@ -554,6 +779,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
554 779
555 validation_state.ip = ip; 780 validation_state.ip = ip;
556 781
782 if (!vc4_handle_branch_target(&validation_state))
783 goto fail;
784
557 switch (sig) { 785 switch (sig) {
558 case QPU_SIG_NONE: 786 case QPU_SIG_NONE:
559 case QPU_SIG_WAIT_FOR_SCOREBOARD: 787 case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -569,7 +797,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
569 goto fail; 797 goto fail;
570 } 798 }
571 799
572 if (!check_instruction_reads(inst, validated_shader)) 800 if (!check_instruction_reads(validated_shader,
801 &validation_state))
573 goto fail; 802 goto fail;
574 803
575 if (sig == QPU_SIG_PROG_END) { 804 if (sig == QPU_SIG_PROG_END) {
@@ -587,6 +816,11 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
587 } 816 }
588 break; 817 break;
589 818
819 case QPU_SIG_BRANCH:
820 if (!check_branch(inst, validated_shader,
821 &validation_state, ip))
822 goto fail;
823 break;
590 default: 824 default:
591 DRM_ERROR("Unsupported QPU signal %d at " 825 DRM_ERROR("Unsupported QPU signal %d at "
592 "instruction %d\n", sig, ip); 826 "instruction %d\n", sig, ip);
@@ -607,6 +841,21 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
607 goto fail; 841 goto fail;
608 } 842 }
609 843
844 /* If we did a backwards branch and we haven't emitted a uniforms
845 * reset since then, we still need the uniforms stream to have the
846 * uniforms address available so that the backwards branch can do its
847 * uniforms reset.
848 *
849 * We could potentially prove that the backwards branch doesn't
850 * contain any uses of uniforms until program exit, but that doesn't
851 * seem to be worth the trouble.
852 */
853 if (validation_state.needs_uniform_address_for_loop) {
854 if (!require_uniform_address_uniform(validated_shader))
855 goto fail;
856 validated_shader->uniforms_size += 4;
857 }
858
610 /* Again, no chance of integer overflow here because the worst case 859 /* Again, no chance of integer overflow here because the worst case
611 * scenario is 8 bytes of uniforms plus handles per 8-byte 860 * scenario is 8 bytes of uniforms plus handles per 8-byte
612 * instruction. 861 * instruction.