gpu: nvgpu: update zcull and pm context pointers

Update zcull and perfmon buffer pointers in context header through function pointers. JIRA GV11B-48 Change-Id: Iaa6dd065128cb0c39e308cecf17b9d68a826d865 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1291850 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: seshendra Gadagottu <sgadagottu@nvidia.com> 2017-01-20 19:13:02 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-01-27 15:23:01 -0500
commit: 88ce7a98c8321747154020c4f173cfe05a3b1149 (patch)
tree: afb1007b673f10de589e47670a8626dc7c44d694 /drivers/gpu
parent: fa3f8cc10186ab18952433f1838fb657f220adc5 (diff)
4 files changed, 62 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ff354bc8..f7ceaced 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -333,6 +333,10 @@ struct gpu_ops {
                                        struct channel_gk20a *c, bool patch);
                int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
                void (*restore_context_header)(struct gk20a *g, struct mem_desc *ctxheader);
+                void (*write_zcull_ptr)(struct gk20a *g,
+                                        struct mem_desc *mem, u64 gpu_va);
+                void (*write_pm_ptr)(struct gk20a *g,
+                                        struct mem_desc *mem, u64 gpu_va);
        } gr;
        const char *name;
        struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 48b825a1..c461a9e1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -742,13 +742,30 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
        return ret;
 }
+void gr_gk20a_write_zcull_ptr(struct gk20a *g,
+                                struct mem_desc *mem, u64 gpu_va)
+{
+        u32 va = u64_lo32(gpu_va >> 8);
+        gk20a_mem_wr(g, mem,
+                ctxsw_prog_main_image_zcull_ptr_o(), va);
+}
+void gr_gk20a_write_pm_ptr(struct gk20a *g,
+                                struct mem_desc *mem, u64 gpu_va)
+{
+        u32 va = u64_lo32(gpu_va >> 8);
+        gk20a_mem_wr(g, mem,
+                ctxsw_prog_main_image_pm_ptr_o(), va);
+}
 static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
 {
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
        struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
        struct mem_desc *ctxheader = &ctx->mem;
-        u32 va_lo, va_hi, va;
        int ret = 0;
        gk20a_dbg_fn("");
@@ -768,10 +785,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
                goto clean_up;
        }
-        va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
-        va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
-        va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
        ret = gk20a_disable_channel_tsg(g, c);
        if (ret) {
                gk20a_err(dev_from_gk20a(g), "failed to disable channel/TSG\n");
@@ -789,11 +802,10 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
                 ch_ctx->zcull_ctx.ctx_sw_mode);
        if (ctxheader->gpu_va)
-                gk20a_mem_wr(g, ctxheader,
+                g->ops.gr.write_zcull_ptr(g, ctxheader,
-                        ctxsw_prog_main_image_zcull_ptr_o(), va);
+                                        ch_ctx->zcull_ctx.gpu_va);
        else
-                gk20a_mem_wr(g, mem,
+                g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va);
-                        ctxsw_prog_main_image_zcull_ptr_o(), va);
        gk20a_enable_channel_tsg(g, c);
@@ -1744,7 +1756,14 @@ restore_fe_go_idle:
        gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
                 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
-        gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0);
+        if (gk20a_mem_begin(g, ctxheader))
+                goto clean_up;
+        if (ctxheader->gpu_va)
+                g->ops.gr.write_zcull_ptr(g, ctxheader, 0);
+        else
+                g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
+        gk20a_mem_end(g, ctxheader);
        g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
@@ -1857,7 +1876,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
        struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
        struct mem_desc *gr_mem;
-        u32 data, virt_addr;
+        u32 data;
+        u64 virt_addr;
        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
        struct mem_desc *ctxheader = &ctx->mem;
        int ret;
@@ -1953,10 +1973,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
        if (enable_hwpm_ctxsw) {
                pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
-                /* pack upper 32 bits of virtual address into a 32 bit number
+                virt_addr = pm_ctx->mem.gpu_va;
-                 * (256 byte boundary)
-                 */
-                virt_addr = (u32)(pm_ctx->mem.gpu_va >> 8);
        } else {
                pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
                virt_addr = 0;
@@ -1965,12 +1982,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
        data |= pm_ctx->pm_mode;
        gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
        if (ctxheader->gpu_va)
-                gk20a_mem_wr(g, ctxheader,
+                g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
-                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
        else
-                gk20a_mem_wr(g, gr_mem,
+                g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
-                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
        gk20a_mem_end(g, ctxheader);
        gk20a_mem_end(g, gr_mem);
@@ -1999,13 +2015,12 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
        u32 virt_addr_lo;
        u32 virt_addr_hi;
-        u32 virt_addr = 0;
+        u64 virt_addr = 0;
        u32 v, data;
        int ret = 0;
        struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
        struct mem_desc *ctxheader = &ctx->mem;
-        u32 va_lo, va_hi, va;
        gk20a_dbg_fn("");
@@ -2100,19 +2115,15 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                        virt_addr_hi);
        }
-        va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
-        va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
-        va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_zcull_o(),
                                ch_ctx->zcull_ctx.ctx_sw_mode);
        if (ctxheader->gpu_va)
-                gk20a_mem_wr(g, ctxheader,
+                g->ops.gr.write_zcull_ptr(g, ctxheader,
-                        ctxsw_prog_main_image_zcull_ptr_o(), va);
+                                        ch_ctx->zcull_ctx.gpu_va);
        else
-                gk20a_mem_wr(g, mem,
+                g->ops.gr.write_zcull_ptr(g, mem,
-                        ctxsw_prog_main_image_zcull_ptr_o(), va);
+                                        ch_ctx->zcull_ctx.gpu_va);
        /* Update main header region of the context buffer with the info needed
         * for PM context switching, including mode and possibly a pointer to
@@ -2126,10 +2137,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                        return -EFAULT;
                }
-                /* pack upper 32 bits of virtual address into a 32 bit number
+                virt_addr = ch_ctx->pm_ctx.mem.gpu_va;
-                 * (256 byte boundary)
-                 */
-                virt_addr = (u32)(ch_ctx->pm_ctx.mem.gpu_va >> 8);
        } else
                virt_addr = 0;
@@ -2138,14 +2146,12 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        data |= ch_ctx->pm_ctx.pm_mode;
        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
-        gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
-        if (ctxheader->gpu_va) {
+        if (ctxheader->gpu_va)
-                gk20a_mem_wr(g, ctxheader,
+                g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
-                        ctxsw_prog_main_image_pm_o(), data);
+        else
-                gk20a_mem_wr(g, ctxheader,
+                g->ops.gr.write_pm_ptr(g, mem, virt_addr);
-                        ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
-        }
        gk20a_mem_end(g, ctxheader);
 clean_up_mem:
@@ -9291,4 +9297,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
        gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;
        gops->gr.commit_inst = gr_gk20a_commit_inst;
+        gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
+        gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index f43e57fd..19ab2ec0 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -711,6 +711,13 @@ void gr_gk20a_init_sm_id_table(struct gk20a *g);
 int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
+void gr_gk20a_write_zcull_ptr(struct gk20a *g,
+                                struct mem_desc *mem, u64 gpu_va);
+void gr_gk20a_write_pm_ptr(struct gk20a *g,
+                                struct mem_desc *mem, u64 gpu_va);
 static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
 {
        switch (graphics_preempt_mode) {
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 0df2845f..391fb8a2 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1603,4 +1603,7 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
        gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;
        gops->gr.commit_inst = gr_gk20a_commit_inst;
+        gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
+        gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
 }
author	seshendra Gadagottu <sgadagottu@nvidia.com>	2017-01-20 19:13:02 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-01-27 15:23:01 -0500
commit	88ce7a98c8321747154020c4f173cfe05a3b1149 (patch)
tree	afb1007b673f10de589e47670a8626dc7c44d694 /drivers/gpu
parent	fa3f8cc10186ab18952433f1838fb657f220adc5 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ff354bc8..f7ceaced 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -333,6 +333,10 @@ struct gpu_ops {
333	struct channel_gk20a *c, bool patch);	333	struct channel_gk20a *c, bool patch);
334	int (commit_inst)(struct channel_gk20a c, u64 gpu_va);	334	int (commit_inst)(struct channel_gk20a c, u64 gpu_va);
335	void (restore_context_header)(struct gk20a g, struct mem_desc *ctxheader);	335	void (restore_context_header)(struct gk20a g, struct mem_desc *ctxheader);
		336	void (write_zcull_ptr)(struct gk20a g,
		337	struct mem_desc *mem, u64 gpu_va);
		338	void (write_pm_ptr)(struct gk20a g,
		339	struct mem_desc *mem, u64 gpu_va);
336	} gr;	340	} gr;
337	const char *name;	341	const char *name;
338	struct {	342	struct {


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 48b825a1..c461a9e1 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -742,13 +742,30 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
742	return ret;	742	return ret;
743	}	743	}
744		744
		745	void gr_gk20a_write_zcull_ptr(struct gk20a *g,
		746	struct mem_desc *mem, u64 gpu_va)
		747	{
		748	u32 va = u64_lo32(gpu_va >> 8);
		749
		750	gk20a_mem_wr(g, mem,
		751	ctxsw_prog_main_image_zcull_ptr_o(), va);
		752	}
		753
		754	void gr_gk20a_write_pm_ptr(struct gk20a *g,
		755	struct mem_desc *mem, u64 gpu_va)
		756	{
		757	u32 va = u64_lo32(gpu_va >> 8);
		758
		759	gk20a_mem_wr(g, mem,
		760	ctxsw_prog_main_image_pm_ptr_o(), va);
		761	}
		762
745	static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)	763	static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)
746	{	764	{
747	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;	765	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
748	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;	766	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
749	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;	767	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
750	struct mem_desc *ctxheader = &ctx->mem;	768	struct mem_desc *ctxheader = &ctx->mem;
751	u32 va_lo, va_hi, va;
752	int ret = 0;	769	int ret = 0;
753		770
754	gk20a_dbg_fn("");	771	gk20a_dbg_fn("");
@@ -768,10 +785,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)
768	goto clean_up;	785	goto clean_up;
769	}	786	}
770		787
771	va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
772	va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
773	va = ((va_lo >> 8) & 0x00FFFFFF) \| ((va_hi << 24) & 0xFF000000);
774
775	ret = gk20a_disable_channel_tsg(g, c);	788	ret = gk20a_disable_channel_tsg(g, c);
776	if (ret) {	789	if (ret) {
777	gk20a_err(dev_from_gk20a(g), "failed to disable channel/TSG\n");	790	gk20a_err(dev_from_gk20a(g), "failed to disable channel/TSG\n");
@@ -789,11 +802,10 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a g, struct channel_gk20a c)
789	ch_ctx->zcull_ctx.ctx_sw_mode);	802	ch_ctx->zcull_ctx.ctx_sw_mode);
790		803
791	if (ctxheader->gpu_va)	804	if (ctxheader->gpu_va)
792	gk20a_mem_wr(g, ctxheader,	805	g->ops.gr.write_zcull_ptr(g, ctxheader,
793	ctxsw_prog_main_image_zcull_ptr_o(), va);	806	ch_ctx->zcull_ctx.gpu_va);
794	else	807	else
795	gk20a_mem_wr(g, mem,	808	g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va);
796	ctxsw_prog_main_image_zcull_ptr_o(), va);
797		809
798	gk20a_enable_channel_tsg(g, c);	810	gk20a_enable_channel_tsg(g, c);
799		811
@@ -1744,7 +1756,14 @@ restore_fe_go_idle:
1744	gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),	1756	gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1745	ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());	1757	ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1746		1758
1747	gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0);	1759	if (gk20a_mem_begin(g, ctxheader))
		1760	goto clean_up;
		1761
		1762	if (ctxheader->gpu_va)
		1763	g->ops.gr.write_zcull_ptr(g, ctxheader, 0);
		1764	else
		1765	g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
		1766	gk20a_mem_end(g, ctxheader);
1748		1767
1749	g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);	1768	g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1750		1769
@@ -1857,7 +1876,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1857	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;	1876	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1858	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;	1877	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1859	struct mem_desc *gr_mem;	1878	struct mem_desc *gr_mem;
1860	u32 data, virt_addr;	1879	u32 data;
		1880	u64 virt_addr;
1861	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;	1881	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1862	struct mem_desc *ctxheader = &ctx->mem;	1882	struct mem_desc *ctxheader = &ctx->mem;
1863	int ret;	1883	int ret;
@@ -1953,10 +1973,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1953	if (enable_hwpm_ctxsw) {	1973	if (enable_hwpm_ctxsw) {
1954	pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();	1974	pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
1955		1975
1956	/* pack upper 32 bits of virtual address into a 32 bit number	1976	virt_addr = pm_ctx->mem.gpu_va;
1957	* (256 byte boundary)
1958	*/
1959	virt_addr = (u32)(pm_ctx->mem.gpu_va >> 8);
1960	} else {	1977	} else {
1961	pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();	1978	pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1962	virt_addr = 0;	1979	virt_addr = 0;
@@ -1965,12 +1982,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1965	data \|= pm_ctx->pm_mode;	1982	data \|= pm_ctx->pm_mode;
1966		1983
1967	gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);	1984	gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
		1985
1968	if (ctxheader->gpu_va)	1986	if (ctxheader->gpu_va)
1969	gk20a_mem_wr(g, ctxheader,	1987	g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
1970	ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1971	else	1988	else
1972	gk20a_mem_wr(g, gr_mem,	1989	g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
1973	ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1974		1990
1975	gk20a_mem_end(g, ctxheader);	1991	gk20a_mem_end(g, ctxheader);
1976	gk20a_mem_end(g, gr_mem);	1992	gk20a_mem_end(g, gr_mem);
@@ -1999,13 +2015,12 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1999	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;	2015	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
2000	u32 virt_addr_lo;	2016	u32 virt_addr_lo;
2001	u32 virt_addr_hi;	2017	u32 virt_addr_hi;
2002	u32 virt_addr = 0;	2018	u64 virt_addr = 0;
2003	u32 v, data;	2019	u32 v, data;
2004	int ret = 0;	2020	int ret = 0;
2005	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;	2021	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
2006	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;	2022	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
2007	struct mem_desc *ctxheader = &ctx->mem;	2023	struct mem_desc *ctxheader = &ctx->mem;
2008	u32 va_lo, va_hi, va;
2009		2024
2010	gk20a_dbg_fn("");	2025	gk20a_dbg_fn("");
2011		2026
@@ -2100,19 +2115,15 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2100	virt_addr_hi);	2115	virt_addr_hi);
2101	}	2116	}
2102		2117
2103	va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
2104	va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
2105	va = ((va_lo >> 8) & 0x00FFFFFF) \| ((va_hi << 24) & 0xFF000000);
2106
2107	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_zcull_o(),	2118	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_zcull_o(),
2108	ch_ctx->zcull_ctx.ctx_sw_mode);	2119	ch_ctx->zcull_ctx.ctx_sw_mode);
2109		2120
2110	if (ctxheader->gpu_va)	2121	if (ctxheader->gpu_va)
2111	gk20a_mem_wr(g, ctxheader,	2122	g->ops.gr.write_zcull_ptr(g, ctxheader,
2112	ctxsw_prog_main_image_zcull_ptr_o(), va);	2123	ch_ctx->zcull_ctx.gpu_va);
2113	else	2124	else
2114	gk20a_mem_wr(g, mem,	2125	g->ops.gr.write_zcull_ptr(g, mem,
2115	ctxsw_prog_main_image_zcull_ptr_o(), va);	2126	ch_ctx->zcull_ctx.gpu_va);
2116		2127
2117	/* Update main header region of the context buffer with the info needed	2128	/* Update main header region of the context buffer with the info needed
2118	* for PM context switching, including mode and possibly a pointer to	2129	* for PM context switching, including mode and possibly a pointer to
@@ -2126,10 +2137,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2126	return -EFAULT;	2137	return -EFAULT;
2127	}	2138	}
2128		2139
2129	/* pack upper 32 bits of virtual address into a 32 bit number	2140	virt_addr = ch_ctx->pm_ctx.mem.gpu_va;
2130	* (256 byte boundary)
2131	*/
2132	virt_addr = (u32)(ch_ctx->pm_ctx.mem.gpu_va >> 8);
2133	} else	2141	} else
2134	virt_addr = 0;	2142	virt_addr = 0;
2135		2143
@@ -2138,14 +2146,12 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2138	data \|= ch_ctx->pm_ctx.pm_mode;	2146	data \|= ch_ctx->pm_ctx.pm_mode;
2139		2147
2140	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);	2148	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
2141	gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
2142		2149
2143	if (ctxheader->gpu_va) {	2150	if (ctxheader->gpu_va)
2144	gk20a_mem_wr(g, ctxheader,	2151	g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
2145	ctxsw_prog_main_image_pm_o(), data);	2152	else
2146	gk20a_mem_wr(g, ctxheader,	2153	g->ops.gr.write_pm_ptr(g, mem, virt_addr);
2147	ctxsw_prog_main_image_pm_ptr_o(), virt_addr);	2154
2148	}
2149		2155
2150	gk20a_mem_end(g, ctxheader);	2156	gk20a_mem_end(g, ctxheader);
2151	clean_up_mem:	2157	clean_up_mem:
@@ -9291,4 +9297,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9291	gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;	9297	gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
9292	gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;	9298	gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;
9293	gops->gr.commit_inst = gr_gk20a_commit_inst;	9299	gops->gr.commit_inst = gr_gk20a_commit_inst;
		9300	gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
		9301	gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
9294	}	9302	}


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index f43e57fd..19ab2ec0 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -711,6 +711,13 @@ void gr_gk20a_init_sm_id_table(struct gk20a *g);
711		711
712	int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);	712	int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
713		713
		714	void gr_gk20a_write_zcull_ptr(struct gk20a *g,
		715	struct mem_desc *mem, u64 gpu_va);
		716
		717	void gr_gk20a_write_pm_ptr(struct gk20a *g,
		718	struct mem_desc *mem, u64 gpu_va);
		719
		720
714	static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)	721	static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
715	{	722	{
716	switch (graphics_preempt_mode) {	723	switch (graphics_preempt_mode) {


diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 0df2845f..391fb8a2 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1603,4 +1603,7 @@ void gm20b_init_gr(struct gpu_ops *gops)
1603	gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;	1603	gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
1604	gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;	1604	gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;
1605	gops->gr.commit_inst = gr_gk20a_commit_inst;	1605	gops->gr.commit_inst = gr_gk20a_commit_inst;
		1606	gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
		1607	gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
		1608
1606	}	1609	}