diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 93 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 |
2 files changed, 24 insertions, 73 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index fb777948..0c64fcba 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -641,81 +641,37 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
641 | } | 641 | } |
642 | 642 | ||
643 | /* | 643 | /* |
644 | * Context state can be written directly or "patched" at times. | 644 | * Context state can be written directly, or "patched" at times. So that code |
645 | * So that code can be used in either situation it is written | 645 | * can be used in either situation it is written using a series of |
646 | * using a series _ctx_patch_write(..., patch) statements. | 646 | * _ctx_patch_write(..., patch) statements. However any necessary map overhead |
647 | * However any necessary cpu map/unmap and gpu l2 invalidates | 647 | * should be minimized; thus, bundle the sequence of these writes together, and |
648 | * should be minimized (to avoid doing it once per patch write). | 648 | * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end. |
649 | * Before a sequence of these set up with "_ctx_patch_write_begin" | ||
650 | * and close with "_ctx_patch_write_end." | ||
651 | */ | 649 | */ |
650 | |||
652 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | 651 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, |
653 | struct channel_ctx_gk20a *ch_ctx) | 652 | struct channel_ctx_gk20a *ch_ctx) |
654 | { | 653 | { |
655 | /* being defensive still... */ | 654 | return gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem); |
656 | if (WARN_ON(ch_ctx->patch_ctx.mem.cpu_va)) { | ||
657 | gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); | ||
658 | return -EBUSY; | ||
659 | } | ||
660 | |||
661 | if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem)) | ||
662 | return -ENOMEM; | ||
663 | |||
664 | return 0; | ||
665 | } | 655 | } |
666 | 656 | ||
667 | int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 657 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
668 | struct channel_ctx_gk20a *ch_ctx) | 658 | struct channel_ctx_gk20a *ch_ctx) |
669 | { | 659 | { |
670 | /* being defensive still... */ | ||
671 | if (!ch_ctx->patch_ctx.mem.cpu_va) { | ||
672 | gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); | ||
673 | return -EINVAL; | ||
674 | } | ||
675 | |||
676 | gk20a_mem_end(g, &ch_ctx->patch_ctx.mem); | 660 | gk20a_mem_end(g, &ch_ctx->patch_ctx.mem); |
677 | return 0; | ||
678 | } | 661 | } |
679 | 662 | ||
680 | int gr_gk20a_ctx_patch_write(struct gk20a *g, | 663 | void gr_gk20a_ctx_patch_write(struct gk20a *g, |
681 | struct channel_ctx_gk20a *ch_ctx, | 664 | struct channel_ctx_gk20a *ch_ctx, |
682 | u32 addr, u32 data, bool patch) | 665 | u32 addr, u32 data, bool patch) |
683 | { | 666 | { |
684 | u32 patch_slot = 0; | ||
685 | bool mapped_here = false; | ||
686 | |||
687 | BUG_ON(patch != 0 && ch_ctx == NULL); | ||
688 | |||
689 | if (patch) { | 667 | if (patch) { |
690 | if (!ch_ctx) | 668 | u32 patch_slot = ch_ctx->patch_ctx.data_count * 2; |
691 | return -EINVAL; | 669 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); |
692 | /* we added an optimization prolog, epilog | 670 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); |
693 | * to get rid of unnecessary maps and l2 invals. | ||
694 | * but be defensive still... */ | ||
695 | if (!ch_ctx->patch_ctx.mem.cpu_va) { | ||
696 | int err; | ||
697 | gk20a_dbg_info("per-write ctx patch begin?"); | ||
698 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
699 | if (err) | ||
700 | return err; | ||
701 | mapped_here = true; | ||
702 | } else | ||
703 | mapped_here = false; | ||
704 | |||
705 | patch_slot = ch_ctx->patch_ctx.data_count * 2; | ||
706 | |||
707 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr); | ||
708 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data); | ||
709 | |||
710 | ch_ctx->patch_ctx.data_count++; | 671 | ch_ctx->patch_ctx.data_count++; |
711 | 672 | } else { | |
712 | if (mapped_here) | ||
713 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
714 | |||
715 | } else | ||
716 | gk20a_writel(g, addr, data); | 673 | gk20a_writel(g, addr, data); |
717 | 674 | } | |
718 | return 0; | ||
719 | } | 675 | } |
720 | 676 | ||
721 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | 677 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, |
@@ -3105,7 +3061,6 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
3105 | 3061 | ||
3106 | /* tweak any perf parameters per-context here */ | 3062 | /* tweak any perf parameters per-context here */ |
3107 | if (args->class_num == KEPLER_COMPUTE_A) { | 3063 | if (args->class_num == KEPLER_COMPUTE_A) { |
3108 | int begin_err; | ||
3109 | u32 tex_lock_disable_mask; | 3064 | u32 tex_lock_disable_mask; |
3110 | u32 texlock; | 3065 | u32 texlock; |
3111 | u32 lockboost_mask; | 3066 | u32 lockboost_mask; |
@@ -3144,24 +3099,20 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
3144 | lockboost = (lockboost & ~lockboost_mask) | | 3099 | lockboost = (lockboost & ~lockboost_mask) | |
3145 | gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0); | 3100 | gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0); |
3146 | 3101 | ||
3147 | begin_err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 3102 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
3148 | 3103 | ||
3149 | if (!begin_err) { | 3104 | if (!err) { |
3150 | err = gr_gk20a_ctx_patch_write(g, ch_ctx, | 3105 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
3151 | gr_gpcs_tpcs_sm_sch_texlock_r(), | 3106 | gr_gpcs_tpcs_sm_sch_texlock_r(), |
3152 | texlock, true); | 3107 | texlock, true); |
3153 | 3108 | gr_gk20a_ctx_patch_write(g, ch_ctx, | |
3154 | if (!err) | 3109 | gr_gpcs_tpcs_sm_sch_macro_sched_r(), |
3155 | err = gr_gk20a_ctx_patch_write(g, ch_ctx, | 3110 | lockboost, true); |
3156 | gr_gpcs_tpcs_sm_sch_macro_sched_r(), | 3111 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); |
3157 | lockboost, true); | 3112 | } else { |
3158 | } | ||
3159 | if ((begin_err || err)) { | ||
3160 | gk20a_err(dev_from_gk20a(g), | 3113 | gk20a_err(dev_from_gk20a(g), |
3161 | "failed to set texlock for compute class"); | 3114 | "failed to set texlock for compute class"); |
3162 | } | 3115 | } |
3163 | if (!begin_err) | ||
3164 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
3165 | 3116 | ||
3166 | args->flags |= NVGPU_ALLOC_OBJ_FLAGS_LOCKBOOST_ZERO; | 3117 | args->flags |= NVGPU_ALLOC_OBJ_FLAGS_LOCKBOOST_ZERO; |
3167 | 3118 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 189994ef..ad6d8049 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -530,11 +530,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
530 | bool enable_hwpm_ctxsw); | 530 | bool enable_hwpm_ctxsw); |
531 | 531 | ||
532 | struct channel_ctx_gk20a; | 532 | struct channel_ctx_gk20a; |
533 | int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, | 533 | void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, |
534 | u32 addr, u32 data, bool patch); | 534 | u32 addr, u32 data, bool patch); |
535 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | 535 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, |
536 | struct channel_ctx_gk20a *ch_ctx); | 536 | struct channel_ctx_gk20a *ch_ctx); |
537 | int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 537 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
538 | struct channel_ctx_gk20a *ch_ctx); | 538 | struct channel_ctx_gk20a *ch_ctx); |
539 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 539 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
540 | struct channel_ctx_gk20a *ch_ctx, | 540 | struct channel_ctx_gk20a *ch_ctx, |