gpu: nvgpu: changes related handling ctx header

ctx header holds only gpu va for each address space. All other information will be held in main context. Ctx header will have gpu va for following fields: ctxsw_prog_main_image_context_buffer_ptr ctxsw_prog_main_image_context_buffer_ptr_hi ctxsw_prog_main_image_zcull_ptr ctxsw_prog_main_image_zcull_ptr ctxsw_prog_main_image_pm_ptr ctxsw_prog_main_image_pm_ptr_hi ctxsw_prog_main_image_full_preemption_ptr_hi ctxsw_prog_main_image_full_preemption_ptr ctxsw_prog_main_image_full_preemption_ptr_xxxx0 ctxsw_prog_main_image_full_preemption_ptr_xxxx0_v ctxsw_prog_main_image_patch_adr_lo ctxsw_prog_main_image_patch_adr_hi Changes done as part of this CL: - Read ctx_id from from main context header - Golden context creation: Use gold_mem for for golden context creation and copy golden context from save gold local memory to main context. No need to restore golden context to context header. - Write ctx_patch_count and smpc_ctxsw_mode in main context header only. - Update preemption mode in main context header and preemption buffer va in context header. - Updated image patch buffer va in context header. Bug 1958308 Change-Id: Ic076aad8b1802f76f941d2d15cb9a8c07308e3e8 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1562680 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
author: seshendra Gadagottu <sgadagottu@nvidia.com> 2017-09-18 14:39:32 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-09-19 20:45:29 -0400
commit: 1132fd2a12a48271f94e995c0466c48b8228c185 (patch)
tree: eaa69417b39f228995dad657146776eec4884ee3 /drivers/gpu/nvgpu/gk20a
parent: c4370d7deff6e3545157e06f51df2fef605a18cc (diff)
1 files changed, 38 insertions, 84 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 82c9fa89..263d8bac 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -104,19 +104,12 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
                u32 *ctx_id)
 {
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
-        struct ctx_header_desc *ctx_header = &ch_ctx->ctx_header;
+        struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem;
-        struct nvgpu_mem *ctx_header_mem = &ctx_header->mem;
-        struct nvgpu_mem *mem;
        /* Channel gr_ctx buffer is gpu cacheable.
           Flush and invalidate before cpu update. */
        g->ops.mm.l2_flush(g, true);
-        if (ctx_header_mem->gpu_va)
-                mem = ctx_header_mem;
-        else
-                mem = &ch_ctx->gr_ctx->mem;
        if (nvgpu_mem_begin(g, mem))
                return -ENOMEM;
@@ -681,20 +674,10 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
                                        struct channel_ctx_gk20a *ch_ctx)
 {
-        struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
-        struct nvgpu_mem *ctxheader = &ctx->mem;
        nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);
        /* Write context count to context image if it is mapped */
-        if (ctxheader->gpu_va) {
+        if (ch_ctx->gr_ctx->mem.cpu_va) {
-                if (ctxheader->cpu_va)
-                        nvgpu_mem_wr(g, ctxheader,
-                             ctxsw_prog_main_image_patch_count_o(),
-                             ch_ctx->patch_ctx.data_count);
-        } else if (ch_ctx->gr_ctx->mem.cpu_va) {
                nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
                             ctxsw_prog_main_image_patch_count_o(),
                             ch_ctx->patch_ctx.data_count);
@@ -1357,8 +1340,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
        struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
        struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
        u32 last_method_data = 0;
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
-        struct nvgpu_mem *ctxheader = &ctx->mem;
        gk20a_dbg_fn("");
@@ -1549,14 +1530,7 @@ restore_fe_go_idle:
        nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
                 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
-        if (nvgpu_mem_begin(g, ctxheader))
+        g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
-                goto clean_up;
-        if (ctxheader->gpu_va)
-                g->ops.gr.write_zcull_ptr(g, ctxheader, 0);
-        else
-                g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
-        nvgpu_mem_end(g, ctxheader);
        err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
        if (err)
@@ -1564,8 +1538,7 @@ restore_fe_go_idle:
        gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
-        if (nvgpu_mem_begin(g, ctxheader))
-                goto clean_up;
        if (gr->ctx_vars.local_golden_image == NULL) {
@@ -1576,17 +1549,11 @@ restore_fe_go_idle:
                        err = -ENOMEM;
                        goto clean_up;
                }
+                nvgpu_mem_rd_n(g, gold_mem, 0,
+                        gr->ctx_vars.local_golden_image,
+                        gr->ctx_vars.golden_image_size);
-                if (ctxheader->gpu_va)
-                        nvgpu_mem_rd_n(g, ctxheader, 0,
-                                gr->ctx_vars.local_golden_image,
-                                gr->ctx_vars.golden_image_size);
-                else
-                        nvgpu_mem_rd_n(g, gold_mem, 0,
-                                gr->ctx_vars.local_golden_image,
-                                gr->ctx_vars.golden_image_size);
        }
-        nvgpu_mem_end(g, ctxheader);
        err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
        if (err)
@@ -1618,8 +1585,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
        struct nvgpu_mem *mem;
        u32 data;
        int ret;
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
-        struct nvgpu_mem *ctxheader = &ctx->mem;
        gk20a_dbg_fn("");
@@ -1651,34 +1616,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
                goto out;
        }
-        if (nvgpu_mem_begin(g, ctxheader)) {
+        data = nvgpu_mem_rd(g, mem,
-                ret = -ENOMEM;
+                ctxsw_prog_main_image_pm_o());
-                goto clean_up_mem;
-        }
-        if (ctxheader->gpu_va)
-                data = nvgpu_mem_rd(g, ctxheader,
-                        ctxsw_prog_main_image_pm_o());
-        else
-                data = nvgpu_mem_rd(g, mem,
-                        ctxsw_prog_main_image_pm_o());
        data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
        data |= enable_smpc_ctxsw ?
                ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
                ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
-        if (ctxheader->gpu_va)
+        nvgpu_mem_wr(g, mem,
-                nvgpu_mem_wr(g, ctxheader,
+                ctxsw_prog_main_image_pm_o(), data);
-                        ctxsw_prog_main_image_pm_o(),
-                        data);
-        else
-                nvgpu_mem_wr(g, mem,
-                        ctxsw_prog_main_image_pm_o(),
-                        data);
-        nvgpu_mem_end(g, ctxheader);
-clean_up_mem:
        nvgpu_mem_end(g, mem);
 out:
        gk20a_enable_channel_tsg(g, c);
@@ -1862,14 +1810,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                goto clean_up_mem;
        }
-        if (ctxheader->gpu_va) {
+        nvgpu_mem_wr_n(g, mem, 0,
-                if (g->ops.gr.restore_context_header)
+                gr->ctx_vars.local_golden_image,
-                        g->ops.gr.restore_context_header(g, ctxheader);
+                gr->ctx_vars.golden_image_size);
-        } else {
-                nvgpu_mem_wr_n(g, mem, 0,
-                        gr->ctx_vars.local_golden_image,
-                        gr->ctx_vars.golden_image_size);
-        }
        if (g->ops.gr.init_ctxsw_hdr_data)
                g->ops.gr.init_ctxsw_hdr_data(g, mem);
@@ -1923,19 +1866,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
                 ch_ctx->patch_ctx.data_count);
-        nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
-                 virt_addr_lo);
-        nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
-                 virt_addr_hi);
        if (ctxheader->gpu_va) {
                nvgpu_mem_wr(g, ctxheader,
-                        ctxsw_prog_main_image_patch_count_o(),
+                        ctxsw_prog_main_image_patch_adr_lo_o(),
-                        ch_ctx->patch_ctx.data_count);
+                        virt_addr_lo);
                nvgpu_mem_wr(g, ctxheader,
+                        ctxsw_prog_main_image_patch_adr_hi_o(),
+                        virt_addr_hi);
+        } else {
+                nvgpu_mem_wr(g, mem,
                        ctxsw_prog_main_image_patch_adr_lo_o(),
                        virt_addr_lo);
-                        nvgpu_mem_wr(g, ctxheader,
+                nvgpu_mem_wr(g, mem,
                        ctxsw_prog_main_image_patch_adr_hi_o(),
                        virt_addr_hi);
        }
@@ -6606,6 +6549,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
        u32 *ovr_perf_regs = NULL;
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+        struct nvgpu_mem *ctxheader = &ctx->mem;
        g->ops.gr.init_ovr_sm_dsm_perf();
        g->ops.gr.init_sm_dsm_reg_info();
@@ -6640,12 +6585,21 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
                                nvgpu_mem_wr(g, mem,
                                         ctxsw_prog_main_image_patch_count_o(),
                                         ch_ctx->patch_ctx.data_count);
-                                nvgpu_mem_wr(g, mem,
+                                if (ctxheader->gpu_va) {
-                                         ctxsw_prog_main_image_patch_adr_lo_o(),
+                                        nvgpu_mem_wr(g, ctxheader,
-                                         vaddr_lo);
+                                                ctxsw_prog_main_image_patch_adr_lo_o(),
-                                nvgpu_mem_wr(g, mem,
+                                                vaddr_lo);
-                                         ctxsw_prog_main_image_patch_adr_hi_o(),
+                                        nvgpu_mem_wr(g, ctxheader,
-                                         vaddr_hi);
+                                                ctxsw_prog_main_image_patch_adr_hi_o(),
+                                                vaddr_hi);
+                                } else {
+                                        nvgpu_mem_wr(g, mem,
+                                                ctxsw_prog_main_image_patch_adr_lo_o(),
+                                                vaddr_lo);
+                                        nvgpu_mem_wr(g, mem,
+                                                ctxsw_prog_main_image_patch_adr_hi_o(),
+                                                vaddr_hi);
+                                }
                                /* we're not caching these on cpu side,
                                   but later watch for it */
author	seshendra Gadagottu <sgadagottu@nvidia.com>	2017-09-18 14:39:32 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-09-19 20:45:29 -0400
commit	1132fd2a12a48271f94e995c0466c48b8228c185 (patch)
tree	eaa69417b39f228995dad657146776eec4884ee3 /drivers/gpu/nvgpu/gk20a
parent	c4370d7deff6e3545157e06f51df2fef605a18cc (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 82c9fa89..263d8bac 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -104,19 +104,12 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
104	u32 *ctx_id)	104	u32 *ctx_id)
105	{	105	{
106	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;	106	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
107	struct ctx_header_desc *ctx_header = &ch_ctx->ctx_header;	107	struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem;
108	struct nvgpu_mem *ctx_header_mem = &ctx_header->mem;
109	struct nvgpu_mem *mem;
110		108
111	/* Channel gr_ctx buffer is gpu cacheable.	109	/* Channel gr_ctx buffer is gpu cacheable.
112	Flush and invalidate before cpu update. */	110	Flush and invalidate before cpu update. */
113	g->ops.mm.l2_flush(g, true);	111	g->ops.mm.l2_flush(g, true);
114		112
115	if (ctx_header_mem->gpu_va)
116	mem = ctx_header_mem;
117	else
118	mem = &ch_ctx->gr_ctx->mem;
119
120	if (nvgpu_mem_begin(g, mem))	113	if (nvgpu_mem_begin(g, mem))
121	return -ENOMEM;	114	return -ENOMEM;
122		115
@@ -681,20 +674,10 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
681	void gr_gk20a_ctx_patch_write_end(struct gk20a *g,	674	void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
682	struct channel_ctx_gk20a *ch_ctx)	675	struct channel_ctx_gk20a *ch_ctx)
683	{	676	{
684	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
685	struct nvgpu_mem *ctxheader = &ctx->mem;
686
687	nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);	677	nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);
688		678
689	/* Write context count to context image if it is mapped */	679	/* Write context count to context image if it is mapped */
690	if (ctxheader->gpu_va) {	680	if (ch_ctx->gr_ctx->mem.cpu_va) {
691
692	if (ctxheader->cpu_va)
693	nvgpu_mem_wr(g, ctxheader,
694	ctxsw_prog_main_image_patch_count_o(),
695	ch_ctx->patch_ctx.data_count);
696
697	} else if (ch_ctx->gr_ctx->mem.cpu_va) {
698	nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,	681	nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
699	ctxsw_prog_main_image_patch_count_o(),	682	ctxsw_prog_main_image_patch_count_o(),
700	ch_ctx->patch_ctx.data_count);	683	ch_ctx->patch_ctx.data_count);
@@ -1357,8 +1340,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1357	struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;	1340	struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1358	struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;	1341	struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
1359	u32 last_method_data = 0;	1342	u32 last_method_data = 0;
1360	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1361	struct nvgpu_mem *ctxheader = &ctx->mem;
1362		1343
1363	gk20a_dbg_fn("");	1344	gk20a_dbg_fn("");
1364		1345
@@ -1549,14 +1530,7 @@ restore_fe_go_idle:
1549	nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),	1530	nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1550	ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());	1531	ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1551		1532
1552	if (nvgpu_mem_begin(g, ctxheader))	1533	g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1553	goto clean_up;
1554
1555	if (ctxheader->gpu_va)
1556	g->ops.gr.write_zcull_ptr(g, ctxheader, 0);
1557	else
1558	g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1559	nvgpu_mem_end(g, ctxheader);
1560		1534
1561	err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);	1535	err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1562	if (err)	1536	if (err)
@@ -1564,8 +1538,7 @@ restore_fe_go_idle:
1564		1538
1565	gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());	1539	gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
1566		1540
1567	if (nvgpu_mem_begin(g, ctxheader))	1541
1568	goto clean_up;
1569		1542
1570	if (gr->ctx_vars.local_golden_image == NULL) {	1543	if (gr->ctx_vars.local_golden_image == NULL) {
1571		1544
@@ -1576,17 +1549,11 @@ restore_fe_go_idle:
1576	err = -ENOMEM;	1549	err = -ENOMEM;
1577	goto clean_up;	1550	goto clean_up;
1578	}	1551	}
		1552	nvgpu_mem_rd_n(g, gold_mem, 0,
		1553	gr->ctx_vars.local_golden_image,
		1554	gr->ctx_vars.golden_image_size);
1579		1555
1580	if (ctxheader->gpu_va)
1581	nvgpu_mem_rd_n(g, ctxheader, 0,
1582	gr->ctx_vars.local_golden_image,
1583	gr->ctx_vars.golden_image_size);
1584	else
1585	nvgpu_mem_rd_n(g, gold_mem, 0,
1586	gr->ctx_vars.local_golden_image,
1587	gr->ctx_vars.golden_image_size);
1588	}	1556	}
1589	nvgpu_mem_end(g, ctxheader);
1590		1557
1591	err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);	1558	err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
1592	if (err)	1559	if (err)
@@ -1618,8 +1585,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1618	struct nvgpu_mem *mem;	1585	struct nvgpu_mem *mem;
1619	u32 data;	1586	u32 data;
1620	int ret;	1587	int ret;
1621	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1622	struct nvgpu_mem *ctxheader = &ctx->mem;
1623		1588
1624	gk20a_dbg_fn("");	1589	gk20a_dbg_fn("");
1625		1590
@@ -1651,34 +1616,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1651	goto out;	1616	goto out;
1652	}	1617	}
1653		1618
1654	if (nvgpu_mem_begin(g, ctxheader)) {	1619	data = nvgpu_mem_rd(g, mem,
1655	ret = -ENOMEM;	1620	ctxsw_prog_main_image_pm_o());
1656	goto clean_up_mem;
1657	}
1658	if (ctxheader->gpu_va)
1659	data = nvgpu_mem_rd(g, ctxheader,
1660	ctxsw_prog_main_image_pm_o());
1661	else
1662	data = nvgpu_mem_rd(g, mem,
1663	ctxsw_prog_main_image_pm_o());
1664		1621
1665	data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();	1622	data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1666	data \|= enable_smpc_ctxsw ?	1623	data \|= enable_smpc_ctxsw ?
1667	ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :	1624	ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1668	ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();	1625	ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1669		1626
1670	if (ctxheader->gpu_va)	1627	nvgpu_mem_wr(g, mem,
1671	nvgpu_mem_wr(g, ctxheader,	1628	ctxsw_prog_main_image_pm_o(), data);
1672	ctxsw_prog_main_image_pm_o(),
1673	data);
1674	else
1675	nvgpu_mem_wr(g, mem,
1676	ctxsw_prog_main_image_pm_o(),
1677	data);
1678
1679	nvgpu_mem_end(g, ctxheader);
1680		1629
1681	clean_up_mem:
1682	nvgpu_mem_end(g, mem);	1630	nvgpu_mem_end(g, mem);
1683	out:	1631	out:
1684	gk20a_enable_channel_tsg(g, c);	1632	gk20a_enable_channel_tsg(g, c);
@@ -1862,14 +1810,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1862	goto clean_up_mem;	1810	goto clean_up_mem;
1863	}	1811	}
1864		1812
1865	if (ctxheader->gpu_va) {	1813	nvgpu_mem_wr_n(g, mem, 0,
1866	if (g->ops.gr.restore_context_header)	1814	gr->ctx_vars.local_golden_image,
1867	g->ops.gr.restore_context_header(g, ctxheader);	1815	gr->ctx_vars.golden_image_size);
1868	} else {
1869	nvgpu_mem_wr_n(g, mem, 0,
1870	gr->ctx_vars.local_golden_image,
1871	gr->ctx_vars.golden_image_size);
1872	}
1873		1816
1874	if (g->ops.gr.init_ctxsw_hdr_data)	1817	if (g->ops.gr.init_ctxsw_hdr_data)
1875	g->ops.gr.init_ctxsw_hdr_data(g, mem);	1818	g->ops.gr.init_ctxsw_hdr_data(g, mem);
@@ -1923,19 +1866,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1923		1866
1924	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),	1867	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1925	ch_ctx->patch_ctx.data_count);	1868	ch_ctx->patch_ctx.data_count);
1926	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
1927	virt_addr_lo);
1928	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1929	virt_addr_hi);
1930		1869
1931	if (ctxheader->gpu_va) {	1870	if (ctxheader->gpu_va) {
1932	nvgpu_mem_wr(g, ctxheader,	1871	nvgpu_mem_wr(g, ctxheader,
1933	ctxsw_prog_main_image_patch_count_o(),	1872	ctxsw_prog_main_image_patch_adr_lo_o(),
1934	ch_ctx->patch_ctx.data_count);	1873	virt_addr_lo);
1935	nvgpu_mem_wr(g, ctxheader,	1874	nvgpu_mem_wr(g, ctxheader,
		1875	ctxsw_prog_main_image_patch_adr_hi_o(),
		1876	virt_addr_hi);
		1877	} else {
		1878	nvgpu_mem_wr(g, mem,
1936	ctxsw_prog_main_image_patch_adr_lo_o(),	1879	ctxsw_prog_main_image_patch_adr_lo_o(),
1937	virt_addr_lo);	1880	virt_addr_lo);
1938	nvgpu_mem_wr(g, ctxheader,	1881	nvgpu_mem_wr(g, mem,
1939	ctxsw_prog_main_image_patch_adr_hi_o(),	1882	ctxsw_prog_main_image_patch_adr_hi_o(),
1940	virt_addr_hi);	1883	virt_addr_hi);
1941	}	1884	}
@@ -6606,6 +6549,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6606	u32 *ovr_perf_regs = NULL;	6549	u32 *ovr_perf_regs = NULL;
6607	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	6550	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6608	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);	6551	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
		6552	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
		6553	struct nvgpu_mem *ctxheader = &ctx->mem;
6609		6554
6610	g->ops.gr.init_ovr_sm_dsm_perf();	6555	g->ops.gr.init_ovr_sm_dsm_perf();
6611	g->ops.gr.init_sm_dsm_reg_info();	6556	g->ops.gr.init_sm_dsm_reg_info();
@@ -6640,12 +6585,21 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6640	nvgpu_mem_wr(g, mem,	6585	nvgpu_mem_wr(g, mem,
6641	ctxsw_prog_main_image_patch_count_o(),	6586	ctxsw_prog_main_image_patch_count_o(),
6642	ch_ctx->patch_ctx.data_count);	6587	ch_ctx->patch_ctx.data_count);
6643	nvgpu_mem_wr(g, mem,	6588	if (ctxheader->gpu_va) {
6644	ctxsw_prog_main_image_patch_adr_lo_o(),	6589	nvgpu_mem_wr(g, ctxheader,
6645	vaddr_lo);	6590	ctxsw_prog_main_image_patch_adr_lo_o(),
6646	nvgpu_mem_wr(g, mem,	6591	vaddr_lo);
6647	ctxsw_prog_main_image_patch_adr_hi_o(),	6592	nvgpu_mem_wr(g, ctxheader,
6648	vaddr_hi);	6593	ctxsw_prog_main_image_patch_adr_hi_o(),
		6594	vaddr_hi);
		6595	} else {
		6596	nvgpu_mem_wr(g, mem,
		6597	ctxsw_prog_main_image_patch_adr_lo_o(),
		6598	vaddr_lo);
		6599	nvgpu_mem_wr(g, mem,
		6600	ctxsw_prog_main_image_patch_adr_hi_o(),
		6601	vaddr_hi);
		6602	}
6649		6603
6650	/* we're not caching these on cpu side,	6604	/* we're not caching these on cpu side,
6651	but later watch for it */	6605	but later watch for it */