From c9419732776a3f31b3c1ace0cd113151f3a4d7cd Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Wed, 8 Nov 2017 19:13:29 -0800 Subject: gpu: nvgpu: ctx_patch_write fixes - Add update_patch_count parameter to ctx_patch_write_begin/end functions If True, the main_image_patch_count register will be updated. Previously, the patch count would be updated if the cpu_va for the graphics context was non-NULL, but this only works for sysmem (cpu_va is always 0 for vidmem) - Remove unused patch parameter for the commit_global_timeslice functions JIRA ESRM-74 Bug 2012077 Change-Id: I35d0a9eb48669a227833bba1d2e63e9fe8fd8aa9 Signed-off-by: Peter Daifuku Reviewed-on: https://git-master.nvidia.com/r/1594790 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 60 ++++++++++++++++---------------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 9 +++--- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 4 +-- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 8 ++--- 5 files changed, 37 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 2dbe8b02..4bc5c04b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -401,7 +401,7 @@ struct gpu_ops { void (*program_zcull_mapping)(struct gk20a *g, u32 zcull_alloc_num, u32 *zcull_map_tiles); int (*commit_global_timeslice)(struct gk20a *g, - struct channel_gk20a *c, bool patch); + struct channel_gk20a *c); int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); void (*write_zcull_ptr)(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ddce0716..5c13fec2 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -673,7 +673,8 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) */ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx) + struct channel_ctx_gk20a *ch_ctx, + bool update_patch_count) { int err = 0; @@ -681,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, if (err) return err; - if (ch_ctx->gr_ctx->mem.cpu_va) { + if (update_patch_count) { /* reset patch count if ucode has already processed it */ ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, &ch_ctx->gr_ctx->mem, @@ -693,12 +694,13 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, } void gr_gk20a_ctx_patch_write_end(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx) + struct channel_ctx_gk20a *ch_ctx, + bool update_patch_count) { nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); /* Write context count to context image if it is mapped */ - if (ch_ctx->gr_ctx->mem.cpu_va) { + if (update_patch_count) { nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, ctxsw_prog_main_image_patch_count_o(), ch_ctx->patch_ctx.data_count); @@ -876,7 +878,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, gk20a_dbg_fn(""); if (patch) { int err; - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); if (err) return err; } @@ -922,13 +924,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, g->ops.gr.commit_global_cb_manager(g, c, patch); if (patch) - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); return 0; } -int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, - bool patch) +int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) { struct gr_gk20a *gr = &g->gr; struct channel_ctx_gk20a *ch_ctx = NULL; @@ -946,14 +947,6 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, ds_debug = gk20a_readl(g, gr_ds_debug_r()); mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); - if (patch) { - int err; - ch_ctx = &c->ch_ctx; - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); - if (err) - return err; - } - if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) { pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); @@ -965,27 +958,24 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); } else { gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); } - if (patch) - gr_gk20a_ctx_patch_write_end(g, ch_ctx); - return 0; } @@ -1489,7 +1479,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, goto clean_up; /* override a few ctx state registers */ - g->ops.gr.commit_global_timeslice(g, c, false); + g->ops.gr.commit_global_timeslice(g, c); /* floorsweep anything left */ err = g->ops.gr.init_fs_state(g); @@ -3031,7 +3021,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) lockboost = (lockboost & ~lockboost_mask) | gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0); - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); if (!err) { gr_gk20a_ctx_patch_write(g, ch_ctx, @@ -3040,7 +3030,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_sm_sch_macro_sched_r(), lockboost, true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); } else { nvgpu_err(g, "failed to set texlock for compute class"); @@ -4528,7 +4518,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gr_fe_go_idle_timeout_count_disabled_f()); /* override a few ctx state registers */ - g->ops.gr.commit_global_timeslice(g, NULL, false); + g->ops.gr.commit_global_timeslice(g, NULL); /* floorsweep anything left */ err = g->ops.gr.init_fs_state(g); @@ -7851,7 +7841,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } offset_addrs = offsets + max_offsets; - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); if (err) goto cleanup; @@ -7989,7 +7979,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, nvgpu_kfree(g, offsets); if (ch_ctx->patch_ctx.mem.cpu_va) - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready); if (gr_ctx_ready) nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); if (pm_ctx_ready) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2b3b74bc..b437838f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -619,9 +619,11 @@ struct channel_ctx_gk20a; void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u32 addr, u32 data, bool patch); int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx); + struct channel_ctx_gk20a *ch_ctx, + bool update_patch_count); void gr_gk20a_ctx_patch_write_end(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx); + struct channel_ctx_gk20a *ch_ctx, + bool update_patch_count); void gr_gk20a_commit_global_pagepool(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u32 size, bool patch); @@ -745,8 +747,7 @@ int gr_gk20a_resume_from_pause(struct gk20a *g); int gr_gk20a_clear_sm_errors(struct gk20a *g); u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g); -int gr_gk20a_commit_global_timeslice(struct gk20a *g, - struct channel_gk20a *c, bool patch); +int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c); void gr_gk20a_init_sm_id_table(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 4c49f734..c692d975 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1339,7 +1339,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g, gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, gr->sm_error_states[sm_id].hww_warp_esr_report_mask); } else { - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); if (err) goto enable_ctxsw; @@ -1352,7 +1352,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g, gr->sm_error_states[sm_id].hww_warp_esr_report_mask, true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); } enable_ctxsw: diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 24b22a7d..c9b2f859 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1224,7 +1224,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); } - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); if (err) { nvgpu_err(g, "can't map patch context"); goto out; @@ -1277,7 +1277,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, cbes_reserve), true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); } out: @@ -2254,13 +2254,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); if (err) { nvgpu_err(g, "can't map patch context"); goto enable_ch; } g->ops.gr.commit_global_cb_manager(g, ch, true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx); + gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); } enable_ch: -- cgit v1.2.2