From 2f6698b863c9cc1db6455637b7c72e812b470b93 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 15 Dec 2017 09:04:15 -0800
Subject: gpu: nvgpu: Make graphics context property of TSG

Move graphics context ownership to TSG instead of channel. Combine
channel_ctx_gk20a and gr_ctx_desc to one structure, because the split
between them was arbitrary. Move context header to be property of
channel.

Bug 1842197

Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1639532
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c     | 66 +++++++++++++++++++++-------------
 drivers/gpu/nvgpu/gv11b/gr_gv11b.h     |  9 ++---
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c    |  1 -
 drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 42 ++++++++++++++++++----
 4 files changed, 83 insertions(+), 35 deletions(-)

(limited to 'drivers/gpu/nvgpu/gv11b')

diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d5924169..3030def8 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1373,7 +1373,7 @@ fail_free:
 }
 
 int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
-				struct gr_ctx_desc *gr_ctx,
+				struct nvgpu_gr_ctx *gr_ctx,
 				struct vm_gk20a *vm, u32 class,
 				u32 graphics_preempt_mode,
 				u32 compute_preempt_mode)
@@ -1497,13 +1497,13 @@ fail:
 }
 
 void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
-		struct channel_ctx_gk20a *ch_ctx,
+		struct channel_gk20a *c,
 		struct nvgpu_mem *mem)
 {
-	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
-	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+	struct tsg_gk20a *tsg;
+	struct nvgpu_gr_ctx *gr_ctx;
+	struct ctx_header_desc *ctx = &c->ctx_header;
 	struct nvgpu_mem *ctxheader = &ctx->mem;
-
 	u32 gfxp_preempt_option =
 		ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
 	u32 cilp_preempt_option =
@@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 
 	gk20a_dbg_fn("");
 
+	tsg = tsg_gk20a_from_ch(c);
+	if (!tsg)
+		return;
+
+	gr_ctx = &tsg->gr_ctx;
+
 	if (gr_ctx->graphics_preempt_mode ==
 					NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
 		gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
@@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 				gr_ctx->preempt_ctxsw_buffer.gpu_va);
 		}
 
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
+		err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
 		if (err) {
 			nvgpu_err(g, "can't map patch context");
 			goto out;
@@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 			 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
 
 		gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
-		g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+		g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
 
 		addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
 			gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 		if (size == g->ops.gr.pagepool_default_size(g))
 			size = gr_scc_pagepool_total_pages_hwmax_v();
 
-		g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+		g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
 
 		addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
 			gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 		size = gr_ctx->spill_ctxsw_buffer.size /
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
 
-		gr_gk20a_ctx_patch_write(g, ch_ctx,
+		gr_gk20a_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
 				gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
 				true);
-		gr_gk20a_ctx_patch_write(g, ch_ctx,
+		gr_gk20a_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_size_r(),
 				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
 				true);
 
 		cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
-		gr_gk20a_ctx_patch_write(g, ch_ctx,
+		gr_gk20a_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_swdx_beta_cb_ctrl_r(),
 				gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
 					cbes_reserve),
 				true);
-		gr_gk20a_ctx_patch_write(g, ch_ctx,
+		gr_gk20a_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
 				gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
 					cbes_reserve),
 				true);
 
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
+		gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
 	}
 
 out:
@@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
 }
 
 void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
-					     struct channel_ctx_gk20a *ch_ctx,
+					     struct nvgpu_gr_ctx *gr_ctx,
 					     u64 addr, bool patch)
 {
-	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
 	int attrBufferSize;
 
 	if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
 
 	attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
 
-	gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
+	gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
+	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
 		gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
+	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
 		gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
 		gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
 }
@@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
 	u32 offset = gk20a_gr_gpc_offset(g, gpc) +
 			gk20a_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);
+	struct tsg_gk20a *tsg;
 
 	*early_exit = false;
 	*ignore_debugger = false;
@@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
 		return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
 				warp_esr, fault_ch);
 
-	if (fault_ch)
-		cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
+	if (fault_ch) {
+		tsg = tsg_gk20a_from_ch(fault_ch);
+		if (!tsg)
+			return -EINVAL;
+
+		cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
 			NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
+	}
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
 			"SM Exception received on gpc %d tpc %d sm %d = 0x%08x",
@@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 	if (err)
 		return err;
 
-	ctx = &c->ch_ctx.ctx_header;
+	ctx = &c->ctx_header;
 	addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
 	addr_hi = u64_hi32(ctx->mem.gpu_va);
 
@@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 
 int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
 {
-	struct channel_ctx_gk20a *ch_ctx = NULL;
+	struct nvgpu_gr_ctx *ch_ctx = NULL;
 	u32 pd_ab_dist_cfg0;
 	u32 ds_debug;
 	u32 mpc_vtg_debug;
@@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
 		struct channel_gk20a *ch, u32 sm_id,
 		struct nvgpu_gr_sm_error_state *sm_error_state)
 {
+	struct tsg_gk20a *tsg;
 	u32 gpc, tpc, sm, offset;
 	struct gr_gk20a *gr = &g->gr;
-	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+	struct nvgpu_gr_ctx *ch_ctx;
 	int err = 0;
 
+	tsg = tsg_gk20a_from_ch(ch);
+	if (!tsg)
+		return -EINVAL;
+
+	ch_ctx = &tsg->gr_ctx;
+
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	gr->sm_error_states[sm_id].hww_global_esr =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b69e69bd..022a7698 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -41,9 +41,10 @@ struct zbc_s_table {
 };
 
 struct gk20a;
+struct gr_gk20a;
 struct zbc_entry;
 struct zbc_query_params;
-struct channel_ctx_gk20a;
+struct nvgpu_gr_ctx;
 struct nvgpu_warpstate;
 struct nvgpu_gr_sm_error_state;
 struct gr_ctx_desc;
@@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
 int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
 		       u32 expect_delay);
 void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
-					     struct channel_ctx_gk20a *ch_ctx,
+					     struct nvgpu_gr_ctx *ch_ctx,
 					     u64 addr, bool patch);
 void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
 void gr_gv11b_get_access_map(struct gk20a *g,
@@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
 void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g);
 
 int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
-                                struct gr_ctx_desc *gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                 struct vm_gk20a *vm, u32 class,
                                 u32 graphics_preempt_mode,
                                 u32 compute_preempt_mode);
 
 void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
-                struct channel_ctx_gk20a *ch_ctx,
+                struct channel_gk20a *ch_ctx,
                 struct nvgpu_mem *mem);
 
 #endif
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index aa3d52af..0a552f5b 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = {
 		.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
 		.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
 		.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
-		.free_channel_ctx = gk20a_free_channel_ctx,
 		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
 		.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
 		.get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index fe1aa8a5..607fff91 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
 
 void gv11b_free_subctx_header(struct channel_gk20a *c)
 {
-	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+	struct ctx_header_desc *ctx = &c->ctx_header;
 	struct gk20a *g = c->g;
 
 	nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header");
@@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c)
 
 int gv11b_alloc_subctx_header(struct channel_gk20a *c)
 {
-	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+	struct ctx_header_desc *ctx = &c->ctx_header;
 	struct gk20a *g = c->g;
 	int ret = 0;
 
 	nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
 
-	if (ctx->mem.gpu_va == 0) {
+	if (!nvgpu_mem_is_valid(&ctx->mem)) {
 		ret = nvgpu_dma_alloc_flags_sys(g,
 				0, /* No Special flags */
 				ctxsw_prog_fecs_header_v(),
@@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
 
 int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
 {
-	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+	struct ctx_header_desc *ctx = &c->ctx_header;
 	struct nvgpu_mem *gr_mem;
 	struct gk20a *g = c->g;
 	int ret = 0;
 	u32 addr_lo, addr_hi;
+	struct tsg_gk20a *tsg;
+	struct nvgpu_gr_ctx *gr_ctx;
 
-	addr_lo = u64_lo32(gpu_va);
-	addr_hi = u64_hi32(gpu_va);
+	tsg = tsg_gk20a_from_ch(c);
+	if (!tsg)
+		return -EINVAL;
+
+	gr_ctx = &tsg->gr_ctx;
 
 	gr_mem = &ctx->mem;
 	g->ops.mm.l2_flush(g, true);
 	if (nvgpu_mem_begin(g, gr_mem))
 		return -ENOMEM;
 
+	/* set priv access map */
+	addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
+	addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
+	nvgpu_mem_wr(g, gr_mem,
+		ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
+		addr_lo);
+	nvgpu_mem_wr(g, gr_mem,
+		ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
+		addr_hi);
+
+	addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
+	addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
+	nvgpu_mem_wr(g, gr_mem,
+		ctxsw_prog_main_image_patch_adr_lo_o(),
+		addr_lo);
+	nvgpu_mem_wr(g, gr_mem,
+		ctxsw_prog_main_image_patch_adr_hi_o(),
+		addr_hi);
+
+	g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va);
+	g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va);
+
+	addr_lo = u64_lo32(gpu_va);
+	addr_hi = u64_hi32(gpu_va);
+
 	nvgpu_mem_wr(g, gr_mem,
 		ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
 	nvgpu_mem_wr(g, gr_mem,
-- 
cgit v1.2.2