gpu: nvgpu: copy data into channel context header

If channel context has separate context header then copy required info into context header instead of main context header. JIRA GV11B-21 Change-Id: I5e0bdde132fb83956fd6ac473148ad4de498e830 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1229243 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: seshendra Gadagottu <sgadagottu@nvidia.com> 2016-11-16 13:05:23 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-12-20 18:15:13 -0500
commit: d301c02246b95214b13ee7ac8eeceb34acd0899a (patch)
tree: 734e2cc0976f24c29f2e5fa2a322969145db9e12 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent: 92fe0007496fd42983a6849b4f8dd5bc7d124834 (diff)
1 files changed, 112 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b298c4c6..bf279e9a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -731,6 +731,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
 {
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
        struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *ctxheader = &ctx->mem;
        u32 va_lo, va_hi, va;
        int ret = 0;
@@ -739,6 +741,11 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
        if (gk20a_mem_begin(g, mem))
                return -ENOMEM;
+        if (gk20a_mem_begin(g, ctxheader)) {
+                ret = -ENOMEM;
+                goto clean_up_mem;
+        }
        if (ch_ctx->zcull_ctx.gpu_va == 0 &&
            ch_ctx->zcull_ctx.ctx_sw_mode ==
                ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
@@ -766,12 +773,18 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
                        ctxsw_prog_main_image_zcull_o(),
                 ch_ctx->zcull_ctx.ctx_sw_mode);
-        gk20a_mem_wr(g, mem,
+        if (ctxheader->gpu_va)
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_zcull_ptr_o(), va);
+        else
+                gk20a_mem_wr(g, mem,
                        ctxsw_prog_main_image_zcull_ptr_o(), va);
        gk20a_enable_channel_tsg(g, c);
 clean_up:
+        gk20a_mem_end(g, ctxheader);
+clean_up_mem:
        gk20a_mem_end(g, mem);
        return ret;
@@ -1476,11 +1489,14 @@ static u32 gk20a_init_sw_bundle(struct gk20a *g)
        }
        if (g->ops.gr.init_sw_veid_bundle)
                g->ops.gr.init_sw_veid_bundle(g);
        /* disable pipe mode override */
        gk20a_writel(g, gr_pipe_bundle_config_r(),
                     gr_pipe_bundle_config_override_pipe_mode_disabled_f());
+        err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+        if (err)
+                return err;
        /* restore fe_go_idle */
        gk20a_writel(g, gr_fe_go_idle_timeout_r(),
                     gr_fe_go_idle_timeout_count_prod_f());
@@ -1509,6 +1525,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
        u32 last_method_data = 0;
        int retries = FE_PWR_MODE_TIMEOUT_MAX / FE_PWR_MODE_TIMEOUT_DEFAULT;
        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *ctxheader = &ctx->mem;
        gk20a_dbg_fn("");
@@ -1517,9 +1535,20 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
           channels from initializing golden ctx at the same time */
        mutex_lock(&gr->ctx_mutex);
-        if (gr->ctx_vars.golden_image_initialized)
+        if (gr->ctx_vars.golden_image_initialized) {
-                goto clean_up;
+                if (gk20a_mem_begin(g, ctxheader))
+                        return -ENOMEM;
+                if (ctxheader->gpu_va) {
+                        err = gr_gk20a_fecs_ctx_bind_channel(g, c);
+                        if (err)
+                                goto clean_up;
+                        err = gr_gk20a_wait_idle(g, end_jiffies,
+                                        GR_IDLE_CHECK_DEFAULT);
+                }
+                gk20a_mem_end(g, ctxheader);
+                goto clean_up;
+        }
        if (!platform->is_fmodel) {
                gk20a_writel(g, gr_fe_pwr_mode_r(),
                        gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f());
@@ -1792,6 +1821,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
        struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
        struct mem_desc *gr_mem;
        u32 data, virt_addr;
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *ctxheader = &ctx->mem;
        int ret;
        gk20a_dbg_fn("");
@@ -1874,6 +1905,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
                goto cleanup_pm_buf;
        }
+        if (gk20a_mem_begin(g, ctxheader)) {
+                ret = -ENOMEM;
+                goto clean_up_mem;
+        }
        data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
        data = data & ~ctxsw_prog_main_image_pm_mode_m();
@@ -1892,14 +1928,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
        data |= pm_ctx->pm_mode;
        gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
-        gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
+        if (ctxheader->gpu_va)
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
+        else
+                gk20a_mem_wr(g, gr_mem,
+                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
+        gk20a_mem_end(g, ctxheader);
        gk20a_mem_end(g, gr_mem);
        /* enable channel */
        gk20a_enable_channel_tsg(g, c);
        return 0;
+clean_up_mem:
+        gk20a_mem_end(g, gr_mem);
 cleanup_pm_buf:
        gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
                        gk20a_mem_flag_none);
@@ -1923,12 +1967,16 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        int ret = 0;
        struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct mem_desc *ctxheader = &ctx->mem;
+        u32 va_lo, va_hi, va;
        gk20a_dbg_fn("");
        if (gr->ctx_vars.local_golden_image == NULL)
                return -1;
        /* Channel gr_ctx buffer is gpu cacheable.
           Flush and invalidate before cpu update. */
        g->ops.mm.l2_flush(g, true);
@@ -1936,6 +1984,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        if (gk20a_mem_begin(g, mem))
                return -ENOMEM;
+        if (gk20a_mem_begin(g, ctxheader)) {
+                ret = -ENOMEM;
+                goto clean_up_mem;
+        }
        gk20a_mem_wr_n(g, mem, 0,
                        gr->ctx_vars.local_golden_image,
                        gr->ctx_vars.golden_image_size);
@@ -1945,7 +1998,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
        /* set priv access map */
        virt_addr_lo =
                 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -1959,10 +2011,22 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
                 data);
-        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
-                 virt_addr_lo);
+        if (ctxheader->gpu_va) {
-        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
+                gk20a_mem_wr(g, ctxheader,
-                 virt_addr_hi);
+                        ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
+                        virt_addr_lo);
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
+                        virt_addr_hi);
+        } else {
+                gk20a_mem_wr(g, mem,
+                        ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
+                        virt_addr_lo);
+                gk20a_mem_wr(g, mem,
+                        ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
+                        virt_addr_hi);
+        }
        /* disable verif features */
        v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
        v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
@@ -1982,6 +2046,32 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
                 virt_addr_hi);
+        if (ctxheader->gpu_va) {
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_patch_count_o(),
+                        ch_ctx->patch_ctx.data_count);
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_patch_adr_lo_o(),
+                        virt_addr_lo);
+                        gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_patch_adr_hi_o(),
+                        virt_addr_hi);
+        }
+        va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
+        va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
+        va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
+        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_zcull_o(),
+                                ch_ctx->zcull_ctx.ctx_sw_mode);
+        if (ctxheader->gpu_va)
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_zcull_ptr_o(), va);
+        else
+                gk20a_mem_wr(g, mem,
+                        ctxsw_prog_main_image_zcull_ptr_o(), va);
        /* Update main header region of the context buffer with the info needed
         * for PM context switching, including mode and possibly a pointer to
         * the PM backing store.
@@ -2008,9 +2098,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
+        if (ctxheader->gpu_va) {
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_pm_o(), data);
+                gk20a_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
+        }
        gk20a_mem_end(g, mem);
+        gk20a_mem_end(g, ctxheader);
        if (platform->is_fmodel) {
                u32 mdata = fecs_current_ctx_data(g, &c->inst_block);
                ret = gr_gk20a_submit_fecs_method_op(g,
@@ -2031,6 +2130,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                                   "restore context image failed");
        }
+clean_up_mem:
+        gk20a_mem_end(g, mem);
        return ret;
 }
author	seshendra Gadagottu <sgadagottu@nvidia.com>	2016-11-16 13:05:23 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-12-20 18:15:13 -0500
commit	d301c02246b95214b13ee7ac8eeceb34acd0899a (patch)
tree	734e2cc0976f24c29f2e5fa2a322969145db9e12 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent	92fe0007496fd42983a6849b4f8dd5bc7d124834 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index b298c4c6..bf279e9a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -731,6 +731,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)
731	{	731	{
732	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;	732	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
733	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;	733	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
		734	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		735	struct mem_desc *ctxheader = &ctx->mem;
734	u32 va_lo, va_hi, va;	736	u32 va_lo, va_hi, va;
735	int ret = 0;	737	int ret = 0;
736		738
@@ -739,6 +741,11 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)
739	if (gk20a_mem_begin(g, mem))	741	if (gk20a_mem_begin(g, mem))
740	return -ENOMEM;	742	return -ENOMEM;
741		743
		744	if (gk20a_mem_begin(g, ctxheader)) {
		745	ret = -ENOMEM;
		746	goto clean_up_mem;
		747	}
		748
742	if (ch_ctx->zcull_ctx.gpu_va == 0 &&	749	if (ch_ctx->zcull_ctx.gpu_va == 0 &&
743	ch_ctx->zcull_ctx.ctx_sw_mode ==	750	ch_ctx->zcull_ctx.ctx_sw_mode ==
744	ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {	751	ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
@@ -766,12 +773,18 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)
766	ctxsw_prog_main_image_zcull_o(),	773	ctxsw_prog_main_image_zcull_o(),
767	ch_ctx->zcull_ctx.ctx_sw_mode);	774	ch_ctx->zcull_ctx.ctx_sw_mode);
768		775
769	gk20a_mem_wr(g, mem,	776	if (ctxheader->gpu_va)
		777	gk20a_mem_wr(g, ctxheader,
		778	ctxsw_prog_main_image_zcull_ptr_o(), va);
		779	else
		780	gk20a_mem_wr(g, mem,
770	ctxsw_prog_main_image_zcull_ptr_o(), va);	781	ctxsw_prog_main_image_zcull_ptr_o(), va);
771		782
772	gk20a_enable_channel_tsg(g, c);	783	gk20a_enable_channel_tsg(g, c);
773		784
774	clean_up:	785	clean_up:
		786	gk20a_mem_end(g, ctxheader);
		787	clean_up_mem:
775	gk20a_mem_end(g, mem);	788	gk20a_mem_end(g, mem);
776		789
777	return ret;	790	return ret;
@@ -1476,11 +1489,14 @@ static u32 gk20a_init_sw_bundle(struct gk20a *g)
1476	}	1489	}
1477	if (g->ops.gr.init_sw_veid_bundle)	1490	if (g->ops.gr.init_sw_veid_bundle)
1478	g->ops.gr.init_sw_veid_bundle(g);	1491	g->ops.gr.init_sw_veid_bundle(g);
1479
1480	/* disable pipe mode override */	1492	/* disable pipe mode override */
1481	gk20a_writel(g, gr_pipe_bundle_config_r(),	1493	gk20a_writel(g, gr_pipe_bundle_config_r(),
1482	gr_pipe_bundle_config_override_pipe_mode_disabled_f());	1494	gr_pipe_bundle_config_override_pipe_mode_disabled_f());
1483		1495
		1496	err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
		1497	if (err)
		1498	return err;
		1499
1484	/* restore fe_go_idle */	1500	/* restore fe_go_idle */
1485	gk20a_writel(g, gr_fe_go_idle_timeout_r(),	1501	gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1486	gr_fe_go_idle_timeout_count_prod_f());	1502	gr_fe_go_idle_timeout_count_prod_f());
@@ -1509,6 +1525,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1509	u32 last_method_data = 0;	1525	u32 last_method_data = 0;
1510	int retries = FE_PWR_MODE_TIMEOUT_MAX / FE_PWR_MODE_TIMEOUT_DEFAULT;	1526	int retries = FE_PWR_MODE_TIMEOUT_MAX / FE_PWR_MODE_TIMEOUT_DEFAULT;
1511	struct gk20a_platform *platform = dev_get_drvdata(g->dev);	1527	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
		1528	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		1529	struct mem_desc *ctxheader = &ctx->mem;
1512		1530
1513	gk20a_dbg_fn("");	1531	gk20a_dbg_fn("");
1514		1532
@@ -1517,9 +1535,20 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1517	channels from initializing golden ctx at the same time */	1535	channels from initializing golden ctx at the same time */
1518	mutex_lock(&gr->ctx_mutex);	1536	mutex_lock(&gr->ctx_mutex);
1519		1537
1520	if (gr->ctx_vars.golden_image_initialized)	1538	if (gr->ctx_vars.golden_image_initialized) {
1521	goto clean_up;	1539	if (gk20a_mem_begin(g, ctxheader))
		1540	return -ENOMEM;
		1541	if (ctxheader->gpu_va) {
		1542	err = gr_gk20a_fecs_ctx_bind_channel(g, c);
		1543	if (err)
		1544	goto clean_up;
1522		1545
		1546	err = gr_gk20a_wait_idle(g, end_jiffies,
		1547	GR_IDLE_CHECK_DEFAULT);
		1548	}
		1549	gk20a_mem_end(g, ctxheader);
		1550	goto clean_up;
		1551	}
1523	if (!platform->is_fmodel) {	1552	if (!platform->is_fmodel) {
1524	gk20a_writel(g, gr_fe_pwr_mode_r(),	1553	gk20a_writel(g, gr_fe_pwr_mode_r(),
1525	gr_fe_pwr_mode_req_send_f() \| gr_fe_pwr_mode_mode_force_on_f());	1554	gr_fe_pwr_mode_req_send_f() \| gr_fe_pwr_mode_mode_force_on_f());
@@ -1792,6 +1821,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1792	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;	1821	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1793	struct mem_desc *gr_mem;	1822	struct mem_desc *gr_mem;
1794	u32 data, virt_addr;	1823	u32 data, virt_addr;
		1824	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		1825	struct mem_desc *ctxheader = &ctx->mem;
1795	int ret;	1826	int ret;
1796		1827
1797	gk20a_dbg_fn("");	1828	gk20a_dbg_fn("");
@@ -1874,6 +1905,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1874	goto cleanup_pm_buf;	1905	goto cleanup_pm_buf;
1875	}	1906	}
1876		1907
		1908	if (gk20a_mem_begin(g, ctxheader)) {
		1909	ret = -ENOMEM;
		1910	goto clean_up_mem;
		1911	}
		1912
1877	data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());	1913	data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1878	data = data & ~ctxsw_prog_main_image_pm_mode_m();	1914	data = data & ~ctxsw_prog_main_image_pm_mode_m();
1879		1915
@@ -1892,14 +1928,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1892	data \|= pm_ctx->pm_mode;	1928	data \|= pm_ctx->pm_mode;
1893		1929
1894	gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);	1930	gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1895	gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);	1931	if (ctxheader->gpu_va)
		1932	gk20a_mem_wr(g, ctxheader,
		1933	ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
		1934	else
		1935	gk20a_mem_wr(g, gr_mem,
		1936	ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1896		1937
		1938	gk20a_mem_end(g, ctxheader);
1897	gk20a_mem_end(g, gr_mem);	1939	gk20a_mem_end(g, gr_mem);
1898		1940
1899	/* enable channel */	1941	/* enable channel */
1900	gk20a_enable_channel_tsg(g, c);	1942	gk20a_enable_channel_tsg(g, c);
1901		1943
1902	return 0;	1944	return 0;
		1945	clean_up_mem:
		1946	gk20a_mem_end(g, gr_mem);
1903	cleanup_pm_buf:	1947	cleanup_pm_buf:
1904	gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,	1948	gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
1905	gk20a_mem_flag_none);	1949	gk20a_mem_flag_none);
@@ -1923,12 +1967,16 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1923	int ret = 0;	1967	int ret = 0;
1924	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;	1968	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
1925	struct gk20a_platform *platform = dev_get_drvdata(g->dev);	1969	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
		1970	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
		1971	struct mem_desc *ctxheader = &ctx->mem;
		1972	u32 va_lo, va_hi, va;
1926		1973
1927	gk20a_dbg_fn("");	1974	gk20a_dbg_fn("");
1928		1975
1929	if (gr->ctx_vars.local_golden_image == NULL)	1976	if (gr->ctx_vars.local_golden_image == NULL)
1930	return -1;	1977	return -1;
1931		1978
		1979
1932	/* Channel gr_ctx buffer is gpu cacheable.	1980	/* Channel gr_ctx buffer is gpu cacheable.
1933	Flush and invalidate before cpu update. */	1981	Flush and invalidate before cpu update. */
1934	g->ops.mm.l2_flush(g, true);	1982	g->ops.mm.l2_flush(g, true);
@@ -1936,6 +1984,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1936	if (gk20a_mem_begin(g, mem))	1984	if (gk20a_mem_begin(g, mem))
1937	return -ENOMEM;	1985	return -ENOMEM;
1938		1986
		1987	if (gk20a_mem_begin(g, ctxheader)) {
		1988	ret = -ENOMEM;
		1989	goto clean_up_mem;
		1990	}
		1991
1939	gk20a_mem_wr_n(g, mem, 0,	1992	gk20a_mem_wr_n(g, mem, 0,
1940	gr->ctx_vars.local_golden_image,	1993	gr->ctx_vars.local_golden_image,
1941	gr->ctx_vars.golden_image_size);	1994	gr->ctx_vars.golden_image_size);
@@ -1945,7 +1998,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1945		1998
1946	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);	1999	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
1947	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);	2000	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
1948
1949	/* set priv access map */	2001	/* set priv access map */
1950	virt_addr_lo =	2002	virt_addr_lo =
1951	u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);	2003	u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -1959,10 +2011,22 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1959		2011
1960	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),	2012	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1961	data);	2013	data);
1962	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(),	2014
1963	virt_addr_lo);	2015	if (ctxheader->gpu_va) {
1964	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(),	2016	gk20a_mem_wr(g, ctxheader,
1965	virt_addr_hi);	2017	ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
		2018	virt_addr_lo);
		2019	gk20a_mem_wr(g, ctxheader,
		2020	ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
		2021	virt_addr_hi);
		2022	} else {
		2023	gk20a_mem_wr(g, mem,
		2024	ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
		2025	virt_addr_lo);
		2026	gk20a_mem_wr(g, mem,
		2027	ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
		2028	virt_addr_hi);
		2029	}
1966	/* disable verif features */	2030	/* disable verif features */
1967	v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());	2031	v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1968	v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());	2032	v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
@@ -1982,6 +2046,32 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1982	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),	2046	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1983	virt_addr_hi);	2047	virt_addr_hi);
1984		2048
		2049	if (ctxheader->gpu_va) {
		2050	gk20a_mem_wr(g, ctxheader,
		2051	ctxsw_prog_main_image_patch_count_o(),
		2052	ch_ctx->patch_ctx.data_count);
		2053	gk20a_mem_wr(g, ctxheader,
		2054	ctxsw_prog_main_image_patch_adr_lo_o(),
		2055	virt_addr_lo);
		2056	gk20a_mem_wr(g, ctxheader,
		2057	ctxsw_prog_main_image_patch_adr_hi_o(),
		2058	virt_addr_hi);
		2059	}
		2060
		2061	va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
		2062	va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
		2063	va = ((va_lo >> 8) & 0x00FFFFFF) \| ((va_hi << 24) & 0xFF000000);
		2064
		2065	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_zcull_o(),
		2066	ch_ctx->zcull_ctx.ctx_sw_mode);
		2067
		2068	if (ctxheader->gpu_va)
		2069	gk20a_mem_wr(g, ctxheader,
		2070	ctxsw_prog_main_image_zcull_ptr_o(), va);
		2071	else
		2072	gk20a_mem_wr(g, mem,
		2073	ctxsw_prog_main_image_zcull_ptr_o(), va);
		2074
1985	/* Update main header region of the context buffer with the info needed	2075	/* Update main header region of the context buffer with the info needed
1986	* for PM context switching, including mode and possibly a pointer to	2076	* for PM context switching, including mode and possibly a pointer to
1987	* the PM backing store.	2077	* the PM backing store.
@@ -2008,9 +2098,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2008	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);	2098	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
2009	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);	2099	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
2010		2100
		2101	if (ctxheader->gpu_va) {
		2102	gk20a_mem_wr(g, ctxheader,
		2103	ctxsw_prog_main_image_pm_o(), data);
		2104	gk20a_mem_wr(g, ctxheader,
		2105	ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
		2106	}
		2107
2011	gk20a_mem_end(g, mem);	2108	gk20a_mem_end(g, mem);
		2109	gk20a_mem_end(g, ctxheader);
2012		2110
2013	if (platform->is_fmodel) {	2111	if (platform->is_fmodel) {
		2112
2014	u32 mdata = fecs_current_ctx_data(g, &c->inst_block);	2113	u32 mdata = fecs_current_ctx_data(g, &c->inst_block);
2015		2114
2016	ret = gr_gk20a_submit_fecs_method_op(g,	2115	ret = gr_gk20a_submit_fecs_method_op(g,
@@ -2031,6 +2130,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2031	"restore context image failed");	2130	"restore context image failed");
2032	}	2131	}
2033		2132
		2133	clean_up_mem:
		2134	gk20a_mem_end(g, mem);
		2135
2034	return ret;	2136	return ret;
2035	}	2137	}
2036		2138