From c9419732776a3f31b3c1ace0cd113151f3a4d7cd Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Wed, 8 Nov 2017 19:13:29 -0800
Subject: gpu: nvgpu: ctx_patch_write fixes

- Add update_patch_count parameter to ctx_patch_write_begin/end functions
  If True, the main_image_patch_count register will be updated. Previously,
  the patch count would be updated if the cpu_va for the graphics context
  was non-NULL, but this only works for sysmem (cpu_va is always 0 for vidmem)

- Remove unused patch parameter for the commit_global_timeslice functions

JIRA ESRM-74
Bug 2012077

Change-Id: I35d0a9eb48669a227833bba1d2e63e9fe8fd8aa9
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1594790
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h    |  2 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 60 ++++++++++++++++----------------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h |  9 +++---
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c |  4 +--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c |  8 ++---
 5 files changed, 37 insertions(+), 46 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 2dbe8b02..4bc5c04b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -401,7 +401,7 @@ struct gpu_ops {
 		void (*program_zcull_mapping)(struct gk20a *g,
 				u32 zcull_alloc_num, u32 *zcull_map_tiles);
 		int (*commit_global_timeslice)(struct gk20a *g,
-					struct channel_gk20a *c, bool patch);
+					struct channel_gk20a *c);
 		int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
 		void (*write_zcull_ptr)(struct gk20a *g,
 					struct nvgpu_mem *mem, u64 gpu_va);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ddce0716..5c13fec2 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,8 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
  */
 
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
-					  struct channel_ctx_gk20a *ch_ctx)
+					  struct channel_ctx_gk20a *ch_ctx,
+					  bool update_patch_count)
 {
 	int err = 0;
 
@@ -681,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 	if (err)
 		return err;
 
-	if (ch_ctx->gr_ctx->mem.cpu_va) {
+	if (update_patch_count) {
 		/* reset patch count if ucode has already processed it */
 		ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
 						&ch_ctx->gr_ctx->mem,
@@ -693,12 +694,13 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 }
 
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
-					struct channel_ctx_gk20a *ch_ctx)
+					struct channel_ctx_gk20a *ch_ctx,
+					bool update_patch_count)
 {
 	nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);
 
 	/* Write context count to context image if it is mapped */
-	if (ch_ctx->gr_ctx->mem.cpu_va) {
+	if (update_patch_count) {
 		nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
 			     ctxsw_prog_main_image_patch_count_o(),
 			     ch_ctx->patch_ctx.data_count);
@@ -876,7 +878,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	gk20a_dbg_fn("");
 	if (patch) {
 		int err;
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
 		if (err)
 			return err;
 	}
@@ -922,13 +924,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	g->ops.gr.commit_global_cb_manager(g, c, patch);
 
 	if (patch)
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
 
 	return 0;
 }
 
-int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
-								bool patch)
+int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
 {
 	struct gr_gk20a *gr = &g->gr;
 	struct channel_ctx_gk20a *ch_ctx = NULL;
@@ -946,14 +947,6 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
 	ds_debug = gk20a_readl(g, gr_ds_debug_r());
 	mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
 
-	if (patch) {
-		int err;
-		ch_ctx = &c->ch_ctx;
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
-		if (err)
-			return err;
-	}
-
 	if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
 		pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
 		pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
@@ -965,27 +958,24 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
 		ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
 		mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
 
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
 	} else {
 		gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
 		pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
 		ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
 		mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
 
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
-		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
+		gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
 	}
 
-	if (patch)
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
-
 	return 0;
 }
 
@@ -1489,7 +1479,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 		goto clean_up;
 
 	/* override a few ctx state registers */
-	g->ops.gr.commit_global_timeslice(g, c, false);
+	g->ops.gr.commit_global_timeslice(g, c);
 
 	/* floorsweep anything left */
 	err = g->ops.gr.init_fs_state(g);
@@ -3031,7 +3021,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 		lockboost = (lockboost & ~lockboost_mask) |
 			gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0);
 
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
 
 		if (!err) {
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
@@ -3040,7 +3030,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpcs_tpcs_sm_sch_macro_sched_r(),
 				lockboost, true);
-			gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+			gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
 		} else {
 			nvgpu_err(g,
 				   "failed to set texlock for compute class");
@@ -4528,7 +4518,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 		gr_fe_go_idle_timeout_count_disabled_f());
 
 	/* override a few ctx state registers */
-	g->ops.gr.commit_global_timeslice(g, NULL, false);
+	g->ops.gr.commit_global_timeslice(g, NULL);
 
 	/* floorsweep anything left */
 	err = g->ops.gr.init_fs_state(g);
@@ -7851,7 +7841,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	}
 	offset_addrs = offsets + max_offsets;
 
-	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
 	if (err)
 		goto cleanup;
 
@@ -7989,7 +7979,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 		nvgpu_kfree(g, offsets);
 
 	if (ch_ctx->patch_ctx.mem.cpu_va)
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready);
 	if (gr_ctx_ready)
 		nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem);
 	if (pm_ctx_ready)
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2b3b74bc..b437838f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -619,9 +619,11 @@ struct channel_ctx_gk20a;
 void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
 				    u32 addr, u32 data, bool patch);
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
-					  struct channel_ctx_gk20a *ch_ctx);
+					  struct channel_ctx_gk20a *ch_ctx,
+					  bool update_patch_count);
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
-					struct channel_ctx_gk20a *ch_ctx);
+					struct channel_ctx_gk20a *ch_ctx,
+					bool update_patch_count);
 void gr_gk20a_commit_global_pagepool(struct gk20a *g,
 				     struct channel_ctx_gk20a *ch_ctx,
 				     u64 addr, u32 size, bool patch);
@@ -745,8 +747,7 @@ int gr_gk20a_resume_from_pause(struct gk20a *g);
 int gr_gk20a_clear_sm_errors(struct gk20a *g);
 u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
 
-int gr_gk20a_commit_global_timeslice(struct gk20a *g,
-					struct channel_gk20a *c, bool patch);
+int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
 
 void gr_gk20a_init_sm_id_table(struct gk20a *g);
 
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 4c49f734..c692d975 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1339,7 +1339,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
 		gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
 				gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
 	} else {
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
 		if (err)
 			goto enable_ctxsw;
 
@@ -1352,7 +1352,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
 				gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
 				true);
 
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
 	}
 
 enable_ctxsw:
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 24b22a7d..c9b2f859 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1224,7 +1224,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
 		}
 
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
 		if (err) {
 			nvgpu_err(g, "can't map patch context");
 			goto out;
@@ -1277,7 +1277,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 					cbes_reserve),
 				true);
 
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
 	}
 
 out:
@@ -2254,13 +2254,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		g->ops.gr.update_ctxsw_preemption_mode(ch->g,
 						ch_ctx, mem);
 
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
 		if (err) {
 			nvgpu_err(g, "can't map patch context");
 			goto enable_ch;
 		}
 		g->ops.gr.commit_global_cb_manager(g, ch, true);
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
 	}
 
 enable_ch:
-- 
cgit v1.2.2