diff options
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_validate_shaders.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate_shaders.c | 281 |
1 files changed, 265 insertions, 16 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index b984f123df09..75c8e8fb1827 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c | |||
@@ -39,6 +39,8 @@ | |||
39 | #include "vc4_drv.h" | 39 | #include "vc4_drv.h" |
40 | #include "vc4_qpu_defines.h" | 40 | #include "vc4_qpu_defines.h" |
41 | 41 | ||
42 | #define LIVE_REG_COUNT (32 + 32 + 4) | ||
43 | |||
42 | struct vc4_shader_validation_state { | 44 | struct vc4_shader_validation_state { |
43 | /* Current IP being validated. */ | 45 | /* Current IP being validated. */ |
44 | uint32_t ip; | 46 | uint32_t ip; |
@@ -57,8 +59,9 @@ struct vc4_shader_validation_state { | |||
57 | * | 59 | * |
58 | * This is used for the validation of direct address memory reads. | 60 | * This is used for the validation of direct address memory reads. |
59 | */ | 61 | */ |
60 | uint32_t live_min_clamp_offsets[32 + 32 + 4]; | 62 | uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; |
61 | bool live_max_clamp_regs[32 + 32 + 4]; | 63 | bool live_max_clamp_regs[LIVE_REG_COUNT]; |
64 | uint32_t live_immediates[LIVE_REG_COUNT]; | ||
62 | 65 | ||
63 | /* Bitfield of which IPs are used as branch targets. | 66 | /* Bitfield of which IPs are used as branch targets. |
64 | * | 67 | * |
@@ -66,6 +69,20 @@ struct vc4_shader_validation_state { | |||
66 | * points and clearing the texturing/clamping state. | 69 | * points and clearing the texturing/clamping state. |
67 | */ | 70 | */ |
68 | unsigned long *branch_targets; | 71 | unsigned long *branch_targets; |
72 | |||
73 | /* Set when entering a basic block, and cleared when the uniform | ||
74 | * address update is found. This is used to make sure that we don't | ||
75 | * read uniforms when the address is undefined. | ||
76 | */ | ||
77 | bool needs_uniform_address_update; | ||
78 | |||
79 | /* Set when we find a backwards branch. If the branch is backwards, | ||
80 | * the taraget is probably doing an address reset to read uniforms, | ||
81 | * and so we need to be sure that a uniforms address is present in the | ||
82 | * stream, even if the shader didn't need to read uniforms in later | ||
83 | * basic blocks. | ||
84 | */ | ||
85 | bool needs_uniform_address_for_loop; | ||
69 | }; | 86 | }; |
70 | 87 | ||
71 | static uint32_t | 88 | static uint32_t |
@@ -227,8 +244,14 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader, | |||
227 | /* Since direct uses a RADDR uniform reference, it will get counted in | 244 | /* Since direct uses a RADDR uniform reference, it will get counted in |
228 | * check_instruction_reads() | 245 | * check_instruction_reads() |
229 | */ | 246 | */ |
230 | if (!is_direct) | 247 | if (!is_direct) { |
248 | if (validation_state->needs_uniform_address_update) { | ||
249 | DRM_ERROR("Texturing with undefined uniform address\n"); | ||
250 | return false; | ||
251 | } | ||
252 | |||
231 | validated_shader->uniforms_size += 4; | 253 | validated_shader->uniforms_size += 4; |
254 | } | ||
232 | 255 | ||
233 | if (submit) { | 256 | if (submit) { |
234 | if (!record_texture_sample(validated_shader, | 257 | if (!record_texture_sample(validated_shader, |
@@ -242,6 +265,98 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader, | |||
242 | return true; | 265 | return true; |
243 | } | 266 | } |
244 | 267 | ||
268 | static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) | ||
269 | { | ||
270 | uint32_t o = validated_shader->num_uniform_addr_offsets; | ||
271 | uint32_t num_uniforms = validated_shader->uniforms_size / 4; | ||
272 | |||
273 | validated_shader->uniform_addr_offsets = | ||
274 | krealloc(validated_shader->uniform_addr_offsets, | ||
275 | (o + 1) * | ||
276 | sizeof(*validated_shader->uniform_addr_offsets), | ||
277 | GFP_KERNEL); | ||
278 | if (!validated_shader->uniform_addr_offsets) | ||
279 | return false; | ||
280 | |||
281 | validated_shader->uniform_addr_offsets[o] = num_uniforms; | ||
282 | validated_shader->num_uniform_addr_offsets++; | ||
283 | |||
284 | return true; | ||
285 | } | ||
286 | |||
287 | static bool | ||
288 | validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, | ||
289 | struct vc4_shader_validation_state *validation_state, | ||
290 | bool is_mul) | ||
291 | { | ||
292 | uint64_t inst = validation_state->shader[validation_state->ip]; | ||
293 | u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | ||
294 | u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | ||
295 | u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | ||
296 | u32 add_lri = raddr_add_a_to_live_reg_index(inst); | ||
297 | /* We want our reset to be pointing at whatever uniform follows the | ||
298 | * uniforms base address. | ||
299 | */ | ||
300 | u32 expected_offset = validated_shader->uniforms_size + 4; | ||
301 | |||
302 | /* We only support absolute uniform address changes, and we | ||
303 | * require that they be in the current basic block before any | ||
304 | * of its uniform reads. | ||
305 | * | ||
306 | * One could potentially emit more efficient QPU code, by | ||
307 | * noticing that (say) an if statement does uniform control | ||
308 | * flow for all threads and that the if reads the same number | ||
309 | * of uniforms on each side. However, this scheme is easy to | ||
310 | * validate so it's all we allow for now. | ||
311 | */ | ||
312 | |||
313 | if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { | ||
314 | DRM_ERROR("uniforms address change must be " | ||
315 | "normal math\n"); | ||
316 | return false; | ||
317 | } | ||
318 | |||
319 | if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { | ||
320 | DRM_ERROR("Uniform address reset must be an ADD.\n"); | ||
321 | return false; | ||
322 | } | ||
323 | |||
324 | if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { | ||
325 | DRM_ERROR("Uniform address reset must be unconditional.\n"); | ||
326 | return false; | ||
327 | } | ||
328 | |||
329 | if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && | ||
330 | !(inst & QPU_PM)) { | ||
331 | DRM_ERROR("No packing allowed on uniforms reset\n"); | ||
332 | return false; | ||
333 | } | ||
334 | |||
335 | if (add_lri == -1) { | ||
336 | DRM_ERROR("First argument of uniform address write must be " | ||
337 | "an immediate value.\n"); | ||
338 | return false; | ||
339 | } | ||
340 | |||
341 | if (validation_state->live_immediates[add_lri] != expected_offset) { | ||
342 | DRM_ERROR("Resetting uniforms with offset %db instead of %db\n", | ||
343 | validation_state->live_immediates[add_lri], | ||
344 | expected_offset); | ||
345 | return false; | ||
346 | } | ||
347 | |||
348 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | ||
349 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { | ||
350 | DRM_ERROR("Second argument of uniform address write must be " | ||
351 | "a uniform.\n"); | ||
352 | return false; | ||
353 | } | ||
354 | |||
355 | validation_state->needs_uniform_address_update = false; | ||
356 | validation_state->needs_uniform_address_for_loop = false; | ||
357 | return require_uniform_address_uniform(validated_shader); | ||
358 | } | ||
359 | |||
245 | static bool | 360 | static bool |
246 | check_reg_write(struct vc4_validated_shader_info *validated_shader, | 361 | check_reg_write(struct vc4_validated_shader_info *validated_shader, |
247 | struct vc4_shader_validation_state *validation_state, | 362 | struct vc4_shader_validation_state *validation_state, |
@@ -251,14 +366,37 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader, | |||
251 | uint32_t waddr = (is_mul ? | 366 | uint32_t waddr = (is_mul ? |
252 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | 367 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : |
253 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | 368 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); |
369 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | ||
370 | bool ws = inst & QPU_WS; | ||
371 | bool is_b = is_mul ^ ws; | ||
372 | u32 lri = waddr_to_live_reg_index(waddr, is_b); | ||
373 | |||
374 | if (lri != -1) { | ||
375 | uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); | ||
376 | uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); | ||
377 | |||
378 | if (sig == QPU_SIG_LOAD_IMM && | ||
379 | QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && | ||
380 | ((is_mul && cond_mul == QPU_COND_ALWAYS) || | ||
381 | (!is_mul && cond_add == QPU_COND_ALWAYS))) { | ||
382 | validation_state->live_immediates[lri] = | ||
383 | QPU_GET_FIELD(inst, QPU_LOAD_IMM); | ||
384 | } else { | ||
385 | validation_state->live_immediates[lri] = ~0; | ||
386 | } | ||
387 | } | ||
254 | 388 | ||
255 | switch (waddr) { | 389 | switch (waddr) { |
256 | case QPU_W_UNIFORMS_ADDRESS: | 390 | case QPU_W_UNIFORMS_ADDRESS: |
257 | /* XXX: We'll probably need to support this for reladdr, but | 391 | if (is_b) { |
258 | * it's definitely a security-related one. | 392 | DRM_ERROR("relative uniforms address change " |
259 | */ | 393 | "unsupported\n"); |
260 | DRM_ERROR("uniforms address load unsupported\n"); | 394 | return false; |
261 | return false; | 395 | } |
396 | |||
397 | return validate_uniform_address_write(validated_shader, | ||
398 | validation_state, | ||
399 | is_mul); | ||
262 | 400 | ||
263 | case QPU_W_TLB_COLOR_MS: | 401 | case QPU_W_TLB_COLOR_MS: |
264 | case QPU_W_TLB_COLOR_ALL: | 402 | case QPU_W_TLB_COLOR_ALL: |
@@ -406,9 +544,35 @@ check_instruction_writes(struct vc4_validated_shader_info *validated_shader, | |||
406 | } | 544 | } |
407 | 545 | ||
408 | static bool | 546 | static bool |
409 | check_instruction_reads(uint64_t inst, | 547 | check_branch(uint64_t inst, |
410 | struct vc4_validated_shader_info *validated_shader) | 548 | struct vc4_validated_shader_info *validated_shader, |
549 | struct vc4_shader_validation_state *validation_state, | ||
550 | int ip) | ||
551 | { | ||
552 | int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); | ||
553 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); | ||
554 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | ||
555 | |||
556 | if ((int)branch_imm < 0) | ||
557 | validation_state->needs_uniform_address_for_loop = true; | ||
558 | |||
559 | /* We don't want to have to worry about validation of this, and | ||
560 | * there's no need for it. | ||
561 | */ | ||
562 | if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { | ||
563 | DRM_ERROR("branch instruction at %d wrote a register.\n", | ||
564 | validation_state->ip); | ||
565 | return false; | ||
566 | } | ||
567 | |||
568 | return true; | ||
569 | } | ||
570 | |||
571 | static bool | ||
572 | check_instruction_reads(struct vc4_validated_shader_info *validated_shader, | ||
573 | struct vc4_shader_validation_state *validation_state) | ||
411 | { | 574 | { |
575 | uint64_t inst = validation_state->shader[validation_state->ip]; | ||
412 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | 576 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); |
413 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | 577 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); |
414 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | 578 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); |
@@ -420,6 +584,12 @@ check_instruction_reads(uint64_t inst, | |||
420 | * already be OOM. | 584 | * already be OOM. |
421 | */ | 585 | */ |
422 | validated_shader->uniforms_size += 4; | 586 | validated_shader->uniforms_size += 4; |
587 | |||
588 | if (validation_state->needs_uniform_address_update) { | ||
589 | DRM_ERROR("Uniform read with undefined uniform " | ||
590 | "address\n"); | ||
591 | return false; | ||
592 | } | ||
423 | } | 593 | } |
424 | 594 | ||
425 | return true; | 595 | return true; |
@@ -516,6 +686,65 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state) | |||
516 | return true; | 686 | return true; |
517 | } | 687 | } |
518 | 688 | ||
689 | /* Resets any known state for the shader, used when we may be branched to from | ||
690 | * multiple locations in the program (or at shader start). | ||
691 | */ | ||
692 | static void | ||
693 | reset_validation_state(struct vc4_shader_validation_state *validation_state) | ||
694 | { | ||
695 | int i; | ||
696 | |||
697 | for (i = 0; i < 8; i++) | ||
698 | validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; | ||
699 | |||
700 | for (i = 0; i < LIVE_REG_COUNT; i++) { | ||
701 | validation_state->live_min_clamp_offsets[i] = ~0; | ||
702 | validation_state->live_max_clamp_regs[i] = false; | ||
703 | validation_state->live_immediates[i] = ~0; | ||
704 | } | ||
705 | } | ||
706 | |||
707 | static bool | ||
708 | texturing_in_progress(struct vc4_shader_validation_state *validation_state) | ||
709 | { | ||
710 | return (validation_state->tmu_write_count[0] != 0 || | ||
711 | validation_state->tmu_write_count[1] != 0); | ||
712 | } | ||
713 | |||
714 | static bool | ||
715 | vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) | ||
716 | { | ||
717 | uint32_t ip = validation_state->ip; | ||
718 | |||
719 | if (!test_bit(ip, validation_state->branch_targets)) | ||
720 | return true; | ||
721 | |||
722 | if (texturing_in_progress(validation_state)) { | ||
723 | DRM_ERROR("Branch target landed during TMU setup\n"); | ||
724 | return false; | ||
725 | } | ||
726 | |||
727 | /* Reset our live values tracking, since this instruction may have | ||
728 | * multiple predecessors. | ||
729 | * | ||
730 | * One could potentially do analysis to determine that, for | ||
731 | * example, all predecessors have a live max clamp in the same | ||
732 | * register, but we don't bother with that. | ||
733 | */ | ||
734 | reset_validation_state(validation_state); | ||
735 | |||
736 | /* Since we've entered a basic block from potentially multiple | ||
737 | * predecessors, we need the uniforms address to be updated before any | ||
738 | * unforms are read. We require that after any branch point, the next | ||
739 | * uniform to be loaded is a uniform address offset. That uniform's | ||
740 | * offset will be marked by the uniform address register write | ||
741 | * validation, or a one-off the end-of-program check. | ||
742 | */ | ||
743 | validation_state->needs_uniform_address_update = true; | ||
744 | |||
745 | return true; | ||
746 | } | ||
747 | |||
519 | struct vc4_validated_shader_info * | 748 | struct vc4_validated_shader_info * |
520 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | 749 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) |
521 | { | 750 | { |
@@ -524,16 +753,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
524 | uint32_t ip; | 753 | uint32_t ip; |
525 | struct vc4_validated_shader_info *validated_shader = NULL; | 754 | struct vc4_validated_shader_info *validated_shader = NULL; |
526 | struct vc4_shader_validation_state validation_state; | 755 | struct vc4_shader_validation_state validation_state; |
527 | int i; | ||
528 | 756 | ||
529 | memset(&validation_state, 0, sizeof(validation_state)); | 757 | memset(&validation_state, 0, sizeof(validation_state)); |
530 | validation_state.shader = shader_obj->vaddr; | 758 | validation_state.shader = shader_obj->vaddr; |
531 | validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); | 759 | validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); |
532 | 760 | ||
533 | for (i = 0; i < 8; i++) | 761 | reset_validation_state(&validation_state); |
534 | validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; | ||
535 | for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) | ||
536 | validation_state.live_min_clamp_offsets[i] = ~0; | ||
537 | 762 | ||
538 | validation_state.branch_targets = | 763 | validation_state.branch_targets = |
539 | kcalloc(BITS_TO_LONGS(validation_state.max_ip), | 764 | kcalloc(BITS_TO_LONGS(validation_state.max_ip), |
@@ -554,6 +779,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
554 | 779 | ||
555 | validation_state.ip = ip; | 780 | validation_state.ip = ip; |
556 | 781 | ||
782 | if (!vc4_handle_branch_target(&validation_state)) | ||
783 | goto fail; | ||
784 | |||
557 | switch (sig) { | 785 | switch (sig) { |
558 | case QPU_SIG_NONE: | 786 | case QPU_SIG_NONE: |
559 | case QPU_SIG_WAIT_FOR_SCOREBOARD: | 787 | case QPU_SIG_WAIT_FOR_SCOREBOARD: |
@@ -569,7 +797,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
569 | goto fail; | 797 | goto fail; |
570 | } | 798 | } |
571 | 799 | ||
572 | if (!check_instruction_reads(inst, validated_shader)) | 800 | if (!check_instruction_reads(validated_shader, |
801 | &validation_state)) | ||
573 | goto fail; | 802 | goto fail; |
574 | 803 | ||
575 | if (sig == QPU_SIG_PROG_END) { | 804 | if (sig == QPU_SIG_PROG_END) { |
@@ -587,6 +816,11 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
587 | } | 816 | } |
588 | break; | 817 | break; |
589 | 818 | ||
819 | case QPU_SIG_BRANCH: | ||
820 | if (!check_branch(inst, validated_shader, | ||
821 | &validation_state, ip)) | ||
822 | goto fail; | ||
823 | break; | ||
590 | default: | 824 | default: |
591 | DRM_ERROR("Unsupported QPU signal %d at " | 825 | DRM_ERROR("Unsupported QPU signal %d at " |
592 | "instruction %d\n", sig, ip); | 826 | "instruction %d\n", sig, ip); |
@@ -607,6 +841,21 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
607 | goto fail; | 841 | goto fail; |
608 | } | 842 | } |
609 | 843 | ||
844 | /* If we did a backwards branch and we haven't emitted a uniforms | ||
845 | * reset since then, we still need the uniforms stream to have the | ||
846 | * uniforms address available so that the backwards branch can do its | ||
847 | * uniforms reset. | ||
848 | * | ||
849 | * We could potentially prove that the backwards branch doesn't | ||
850 | * contain any uses of uniforms until program exit, but that doesn't | ||
851 | * seem to be worth the trouble. | ||
852 | */ | ||
853 | if (validation_state.needs_uniform_address_for_loop) { | ||
854 | if (!require_uniform_address_uniform(validated_shader)) | ||
855 | goto fail; | ||
856 | validated_shader->uniforms_size += 4; | ||
857 | } | ||
858 | |||
610 | /* Again, no chance of integer overflow here because the worst case | 859 | /* Again, no chance of integer overflow here because the worst case |
611 | * scenario is 8 bytes of uniforms plus handles per 8-byte | 860 | * scenario is 8 bytes of uniforms plus handles per 8-byte |
612 | * instruction. | 861 | * instruction. |