diff options
author | Eric Anholt <eric@anholt.net> | 2016-07-02 13:10:24 -0400 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2016-07-15 18:19:12 -0400 |
commit | 93aa9ae3e5523e49e4e5abacd4dbee0e4ab2d931 (patch) | |
tree | 8e226bfa2a0fb60daaeee74298f80b49e0ae37ce /drivers/gpu/drm/vc4/vc4_validate_shaders.c | |
parent | d0566c2a2f2baacefe1eb75be8a001fdd6fe84a3 (diff) |
drm/vc4: Add a bitmap of branch targets during shader validation.
This isn't used yet, it's just a first step toward loop validation.
During the main parsing of instructions, we need to know when we hit a
new basic block so that we can reset validated state.
v2: Fix a stray semicolon after an if block. (caught by kbuild test).
Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_validate_shaders.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate_shaders.c | 114 |
1 files changed, 112 insertions, 2 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 771d904653f2..b984f123df09 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c | |||
@@ -59,6 +59,13 @@ struct vc4_shader_validation_state { | |||
59 | */ | 59 | */ |
60 | uint32_t live_min_clamp_offsets[32 + 32 + 4]; | 60 | uint32_t live_min_clamp_offsets[32 + 32 + 4]; |
61 | bool live_max_clamp_regs[32 + 32 + 4]; | 61 | bool live_max_clamp_regs[32 + 32 + 4]; |
62 | |||
63 | /* Bitfield of which IPs are used as branch targets. | ||
64 | * | ||
65 | * Used for validation that the uniform stream is updated at the right | ||
66 | * points and clearing the texturing/clamping state. | ||
67 | */ | ||
68 | unsigned long *branch_targets; | ||
62 | }; | 69 | }; |
63 | 70 | ||
64 | static uint32_t | 71 | static uint32_t |
@@ -418,13 +425,104 @@ check_instruction_reads(uint64_t inst, | |||
418 | return true; | 425 | return true; |
419 | } | 426 | } |
420 | 427 | ||
428 | /* Make sure that all branches are absolute and point within the shader, and | ||
429 | * note their targets for later. | ||
430 | */ | ||
431 | static bool | ||
432 | vc4_validate_branches(struct vc4_shader_validation_state *validation_state) | ||
433 | { | ||
434 | uint32_t max_branch_target = 0; | ||
435 | bool found_shader_end = false; | ||
436 | int ip; | ||
437 | int shader_end_ip = 0; | ||
438 | int last_branch = -2; | ||
439 | |||
440 | for (ip = 0; ip < validation_state->max_ip; ip++) { | ||
441 | uint64_t inst = validation_state->shader[ip]; | ||
442 | int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); | ||
443 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | ||
444 | uint32_t after_delay_ip = ip + 4; | ||
445 | uint32_t branch_target_ip; | ||
446 | |||
447 | if (sig == QPU_SIG_PROG_END) { | ||
448 | shader_end_ip = ip; | ||
449 | found_shader_end = true; | ||
450 | continue; | ||
451 | } | ||
452 | |||
453 | if (sig != QPU_SIG_BRANCH) | ||
454 | continue; | ||
455 | |||
456 | if (ip - last_branch < 4) { | ||
457 | DRM_ERROR("Branch at %d during delay slots\n", ip); | ||
458 | return false; | ||
459 | } | ||
460 | last_branch = ip; | ||
461 | |||
462 | if (inst & QPU_BRANCH_REG) { | ||
463 | DRM_ERROR("branching from register relative " | ||
464 | "not supported\n"); | ||
465 | return false; | ||
466 | } | ||
467 | |||
468 | if (!(inst & QPU_BRANCH_REL)) { | ||
469 | DRM_ERROR("relative branching required\n"); | ||
470 | return false; | ||
471 | } | ||
472 | |||
473 | /* The actual branch target is the instruction after the delay | ||
474 | * slots, plus whatever byte offset is in the low 32 bits of | ||
475 | * the instruction. Make sure we're not branching beyond the | ||
476 | * end of the shader object. | ||
477 | */ | ||
478 | if (branch_imm % sizeof(inst) != 0) { | ||
479 | DRM_ERROR("branch target not aligned\n"); | ||
480 | return false; | ||
481 | } | ||
482 | |||
483 | branch_target_ip = after_delay_ip + (branch_imm >> 3); | ||
484 | if (branch_target_ip >= validation_state->max_ip) { | ||
485 | DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", | ||
486 | ip, branch_target_ip, | ||
487 | validation_state->max_ip); | ||
488 | return false; | ||
489 | } | ||
490 | set_bit(branch_target_ip, validation_state->branch_targets); | ||
491 | |||
492 | /* Make sure that the non-branching path is also not outside | ||
493 | * the shader. | ||
494 | */ | ||
495 | if (after_delay_ip >= validation_state->max_ip) { | ||
496 | DRM_ERROR("Branch at %d continues past shader end " | ||
497 | "(%d/%d)\n", | ||
498 | ip, after_delay_ip, validation_state->max_ip); | ||
499 | return false; | ||
500 | } | ||
501 | set_bit(after_delay_ip, validation_state->branch_targets); | ||
502 | max_branch_target = max(max_branch_target, after_delay_ip); | ||
503 | |||
504 | /* There are two delay slots after program end is signaled | ||
505 | * that are still executed, then we're finished. | ||
506 | */ | ||
507 | if (found_shader_end && ip == shader_end_ip + 2) | ||
508 | break; | ||
509 | } | ||
510 | |||
511 | if (max_branch_target > shader_end_ip) { | ||
512 | DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); | ||
513 | return false; | ||
514 | } | ||
515 | |||
516 | return true; | ||
517 | } | ||
518 | |||
421 | struct vc4_validated_shader_info * | 519 | struct vc4_validated_shader_info * |
422 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | 520 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) |
423 | { | 521 | { |
424 | bool found_shader_end = false; | 522 | bool found_shader_end = false; |
425 | int shader_end_ip = 0; | 523 | int shader_end_ip = 0; |
426 | uint32_t ip; | 524 | uint32_t ip; |
427 | struct vc4_validated_shader_info *validated_shader; | 525 | struct vc4_validated_shader_info *validated_shader = NULL; |
428 | struct vc4_shader_validation_state validation_state; | 526 | struct vc4_shader_validation_state validation_state; |
429 | int i; | 527 | int i; |
430 | 528 | ||
@@ -437,9 +535,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
437 | for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) | 535 | for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) |
438 | validation_state.live_min_clamp_offsets[i] = ~0; | 536 | validation_state.live_min_clamp_offsets[i] = ~0; |
439 | 537 | ||
538 | validation_state.branch_targets = | ||
539 | kcalloc(BITS_TO_LONGS(validation_state.max_ip), | ||
540 | sizeof(unsigned long), GFP_KERNEL); | ||
541 | if (!validation_state.branch_targets) | ||
542 | goto fail; | ||
543 | |||
440 | validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); | 544 | validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); |
441 | if (!validated_shader) | 545 | if (!validated_shader) |
442 | return NULL; | 546 | goto fail; |
547 | |||
548 | if (!vc4_validate_branches(&validation_state)) | ||
549 | goto fail; | ||
443 | 550 | ||
444 | for (ip = 0; ip < validation_state.max_ip; ip++) { | 551 | for (ip = 0; ip < validation_state.max_ip; ip++) { |
445 | uint64_t inst = validation_state.shader[ip]; | 552 | uint64_t inst = validation_state.shader[ip]; |
@@ -508,9 +615,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |||
508 | (validated_shader->uniforms_size + | 615 | (validated_shader->uniforms_size + |
509 | 4 * validated_shader->num_texture_samples); | 616 | 4 * validated_shader->num_texture_samples); |
510 | 617 | ||
618 | kfree(validation_state.branch_targets); | ||
619 | |||
511 | return validated_shader; | 620 | return validated_shader; |
512 | 621 | ||
513 | fail: | 622 | fail: |
623 | kfree(validation_state.branch_targets); | ||
514 | if (validated_shader) { | 624 | if (validated_shader) { |
515 | kfree(validated_shader->texture_samples); | 625 | kfree(validated_shader->texture_samples); |
516 | kfree(validated_shader); | 626 | kfree(validated_shader); |