2 files changed, 24 insertions, 20 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 1dd42dc2..18e99d6f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -332,6 +332,7 @@ struct gpu_ops {
                int (*commit_global_timeslice)(struct gk20a *g,
                                        struct channel_gk20a *c, bool patch);
                int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
+                void (*restore_context_header)(struct gk20a *g, struct mem_desc *ctxheader);
        } gr;
        const char *name;
        struct {
@@ -420,6 +421,7 @@ struct gpu_ops {
                                        u32 *runlist);
                u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
                void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
+                void (*free_channel_ctx_header)(struct channel_gk20a *ch);
        } fifo;
        struct pmu_v {
                /*used for change of enum zbc update cmd id from ver 0 to ver1*/
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index be8894d2..48b825a1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1571,18 +1571,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
        mutex_lock(&gr->ctx_mutex);
        if (gr->ctx_vars.golden_image_initialized) {
-                if (gk20a_mem_begin(g, ctxheader))
-                        return -ENOMEM;
-                if (ctxheader->gpu_va) {
-                        err = gr_gk20a_fecs_ctx_bind_channel(g, c);
-                        if (err)
-                                goto clean_up;
-                        err = gr_gk20a_wait_idle(g,
-                                                 gk20a_get_gr_idle_timeout(g),
-                                                 GR_IDLE_CHECK_DEFAULT);
-                }
-                gk20a_mem_end(g, ctxheader);
                goto clean_up;
        }
        if (!platform->is_fmodel) {
@@ -1753,7 +1741,6 @@ restore_fe_go_idle:
                data = gk20a_mem_rd32(g, gr_mem, i);
                gk20a_mem_wr32(g, gold_mem, i, data);
        }
        gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
                 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
@@ -1763,6 +1750,9 @@ restore_fe_go_idle:
        gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
+        if (gk20a_mem_begin(g, ctxheader))
+                goto clean_up;
        if (gr->ctx_vars.local_golden_image == NULL) {
                gr->ctx_vars.local_golden_image =
@@ -1773,10 +1763,16 @@ restore_fe_go_idle:
                        goto clean_up;
                }
-                gk20a_mem_rd_n(g, gold_mem, 0,
+                if (ctxheader->gpu_va)
+                        gk20a_mem_rd_n(g, ctxheader, 0,
+                                gr->ctx_vars.local_golden_image,
+                                gr->ctx_vars.golden_image_size);
+                else
+                        gk20a_mem_rd_n(g, gold_mem, 0,
                                gr->ctx_vars.local_golden_image,
                                gr->ctx_vars.golden_image_size);
        }
+        gk20a_mem_end(g, ctxheader);
        g->ops.gr.commit_inst(c, gr_mem->gpu_va);
@@ -2016,7 +2012,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        if (gr->ctx_vars.local_golden_image == NULL)
                return -1;
        /* Channel gr_ctx buffer is gpu cacheable.
           Flush and invalidate before cpu update. */
        g->ops.mm.l2_flush(g, true);
@@ -2029,15 +2024,22 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                goto clean_up_mem;
        }
-        gk20a_mem_wr_n(g, mem, 0,
+        if (ctxheader->gpu_va) {
+                if (g->ops.gr.restore_context_header)
+                        g->ops.gr.restore_context_header(g, ctxheader);
+        } else {
+                gk20a_mem_wr_n(g, mem, 0,
                        gr->ctx_vars.local_golden_image,
                        gr->ctx_vars.golden_image_size);
+                gk20a_mem_wr(g, mem,
+                        ctxsw_prog_main_image_num_save_ops_o(), 0);
+                gk20a_mem_wr(g, mem,
+                        ctxsw_prog_main_image_num_restore_ops_o(), 0);
+        }
        if (g->ops.gr.enable_cde_in_fecs && c->cde)
                g->ops.gr.enable_cde_in_fecs(g, mem);
-        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
-        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
        /* set priv access map */
        virt_addr_lo =
                 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -2145,9 +2147,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
        }
-        gk20a_mem_end(g, mem);
        gk20a_mem_end(g, ctxheader);
 clean_up_mem:
        gk20a_mem_end(g, mem);
@@ -3065,6 +3065,8 @@ static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c)
 void gk20a_free_channel_ctx(struct channel_gk20a *c)
 {
+        if(c->g->ops.fifo.free_channel_ctx_header)
+                c->g->ops.fifo.free_channel_ctx_header(c);
        gr_gk20a_unmap_global_ctx_buffers(c);
        gr_gk20a_free_channel_patch_ctx(c);
        gr_gk20a_free_channel_pm_ctx(c);