5 files changed, 37 insertions, 46 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 2dbe8b02..4bc5c04b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -401,7 +401,7 @@ struct gpu_ops {
                void (*program_zcull_mapping)(struct gk20a *g,
                                u32 zcull_alloc_num, u32 *zcull_map_tiles);
                int (*commit_global_timeslice)(struct gk20a *g,
-                                        struct channel_gk20a *c, bool patch);
+                                        struct channel_gk20a *c);
                int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
                void (*write_zcull_ptr)(struct gk20a *g,
                                        struct nvgpu_mem *mem, u64 gpu_va);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ddce0716..5c13fec2 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,8 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 */
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
-                                          struct channel_ctx_gk20a *ch_ctx)
+                                          struct channel_ctx_gk20a *ch_ctx,
+                                          bool update_patch_count)
 {
        int err = 0;
@@ -681,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
        if (err)
                return err;
-        if (ch_ctx->gr_ctx->mem.cpu_va) {
+        if (update_patch_count) {
                /* reset patch count if ucode has already processed it */
                ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
                                                &ch_ctx->gr_ctx->mem,
@@ -693,12 +694,13 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 }
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
-                                        struct channel_ctx_gk20a *ch_ctx)
+                                        struct channel_ctx_gk20a *ch_ctx,
+                                        bool update_patch_count)
 {
        nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);
        /* Write context count to context image if it is mapped */
-        if (ch_ctx->gr_ctx->mem.cpu_va) {
+        if (update_patch_count) {
                nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
                             ctxsw_prog_main_image_patch_count_o(),
                             ch_ctx->patch_ctx.data_count);
@@ -876,7 +878,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
        gk20a_dbg_fn("");
        if (patch) {
                int err;
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
                if (err)
                        return err;
        }
@@ -922,13 +924,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
        g->ops.gr.commit_global_cb_manager(g, c, patch);
        if (patch)
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
        return 0;
 }
-int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
+int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
-                                                                bool patch)
 {
        struct gr_gk20a *gr = &g->gr;
        struct channel_ctx_gk20a *ch_ctx = NULL;
@@ -946,14 +947,6 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
        ds_debug = gk20a_readl(g, gr_ds_debug_r());
        mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
-        if (patch) {
-                int err;
-                ch_ctx = &c->ch_ctx;
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
-                if (err)
-                        return err;
-        }
        if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
                pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
                pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
@@ -965,27 +958,24 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
                ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
                mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
        } else {
                gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
                pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
                ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
                mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
-                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
+                gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
        }
-        if (patch)
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
        return 0;
 }
@@ -1489,7 +1479,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
                goto clean_up;
        /* override a few ctx state registers */
-        g->ops.gr.commit_global_timeslice(g, c, false);
+        g->ops.gr.commit_global_timeslice(g, c);
        /* floorsweep anything left */
        err = g->ops.gr.init_fs_state(g);
@@ -3031,7 +3021,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
                lockboost = (lockboost & ~lockboost_mask) |
                        gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0);
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
                if (!err) {
                        gr_gk20a_ctx_patch_write(g, ch_ctx,
@@ -3040,7 +3030,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
                        gr_gk20a_ctx_patch_write(g, ch_ctx,
                                gr_gpcs_tpcs_sm_sch_macro_sched_r(),
                                lockboost, true);
-                        gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+                        gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
                } else {
                        nvgpu_err(g,
                                   "failed to set texlock for compute class");
@@ -4528,7 +4518,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
                gr_fe_go_idle_timeout_count_disabled_f());
        /* override a few ctx state registers */
-        g->ops.gr.commit_global_timeslice(g, NULL, false);
+        g->ops.gr.commit_global_timeslice(g, NULL);
        /* floorsweep anything left */
        err = g->ops.gr.init_fs_state(g);
@@ -7851,7 +7841,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
        }
        offset_addrs = offsets + max_offsets;
-        err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+        err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
        if (err)
                goto cleanup;
@@ -7989,7 +7979,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
                nvgpu_kfree(g, offsets);
        if (ch_ctx->patch_ctx.mem.cpu_va)
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready);
        if (gr_ctx_ready)
                nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem);
        if (pm_ctx_ready)
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2b3b74bc..b437838f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -619,9 +619,11 @@ struct channel_ctx_gk20a;
 void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
                                    u32 addr, u32 data, bool patch);
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
-                                          struct channel_ctx_gk20a *ch_ctx);
+                                          struct channel_ctx_gk20a *ch_ctx,
+                                          bool update_patch_count);
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
-                                        struct channel_ctx_gk20a *ch_ctx);
+                                        struct channel_ctx_gk20a *ch_ctx,
+                                        bool update_patch_count);
 void gr_gk20a_commit_global_pagepool(struct gk20a *g,
                                     struct channel_ctx_gk20a *ch_ctx,
                                     u64 addr, u32 size, bool patch);
@@ -745,8 +747,7 @@ int gr_gk20a_resume_from_pause(struct gk20a *g);
 int gr_gk20a_clear_sm_errors(struct gk20a *g);
 u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
-int gr_gk20a_commit_global_timeslice(struct gk20a *g,
+int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
-                                        struct channel_gk20a *c, bool patch);
 void gr_gk20a_init_sm_id_table(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 4c49f734..c692d975 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1339,7 +1339,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
                gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
                                gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
        } else {
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
                if (err)
                        goto enable_ctxsw;
@@ -1352,7 +1352,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
                                gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
                                true);
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
        }
 enable_ctxsw:
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 24b22a7d..c9b2f859 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1224,7 +1224,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
                                gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
                }
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
                if (err) {
                        nvgpu_err(g, "can't map patch context");
                        goto out;
@@ -1277,7 +1277,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
                                        cbes_reserve),
                                true);
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
        }
 out:
@@ -2254,13 +2254,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
                g->ops.gr.update_ctxsw_preemption_mode(ch->g,
                                                ch_ctx, mem);
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
                if (err) {
                        nvgpu_err(g, "can't map patch context");
                        goto enable_ch;
                }
                g->ops.gr.commit_global_cb_manager(g, ch, true);
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
        }
 enable_ch: