4 files changed, 83 insertions, 35 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d5924169..3030def8 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1373,7 +1373,7 @@ fail_free:
 }
 int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
-                                struct gr_ctx_desc *gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm, u32 class,
                                u32 graphics_preempt_mode,
                                u32 compute_preempt_mode)
@@ -1497,13 +1497,13 @@ fail:
 }
 void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
-                struct channel_ctx_gk20a *ch_ctx,
+                struct channel_gk20a *c,
                struct nvgpu_mem *mem)
 {
-        struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+        struct tsg_gk20a *tsg;
-        struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+        struct nvgpu_gr_ctx *gr_ctx;
+        struct ctx_header_desc *ctx = &c->ctx_header;
        struct nvgpu_mem *ctxheader = &ctx->mem;
        u32 gfxp_preempt_option =
                ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
        u32 cilp_preempt_option =
@@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
        gk20a_dbg_fn("");
+        tsg = tsg_gk20a_from_ch(c);
+        if (!tsg)
+                return;
+        gr_ctx = &tsg->gr_ctx;
        if (gr_ctx->graphics_preempt_mode ==
                                        NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
                gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
@@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
                                gr_ctx->preempt_ctxsw_buffer.gpu_va);
                }
-                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
+                err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
                if (err) {
                        nvgpu_err(g, "can't map patch context");
                        goto out;
@@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
                         (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
                gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
-                g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+                g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
                addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
                        gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
                if (size == g->ops.gr.pagepool_default_size(g))
                        size = gr_scc_pagepool_total_pages_hwmax_v();
-                g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+                g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
                addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
                        gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
                size = gr_ctx->spill_ctxsw_buffer.size /
                        gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
-                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                gr_gk20a_ctx_patch_write(g, gr_ctx,
                                gr_gpc0_swdx_rm_spill_buffer_addr_r(),
                                gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
                                true);
-                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                gr_gk20a_ctx_patch_write(g, gr_ctx,
                                gr_gpc0_swdx_rm_spill_buffer_size_r(),
                                gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
                                true);
                cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
-                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                gr_gk20a_ctx_patch_write(g, gr_ctx,
                                gr_gpcs_swdx_beta_cb_ctrl_r(),
                                gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
                                        cbes_reserve),
                                true);
-                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                gr_gk20a_ctx_patch_write(g, gr_ctx,
                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
                                        cbes_reserve),
                                true);
-                gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
+                gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
        }
 out:
@@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
 }
 void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
-                                             struct channel_ctx_gk20a *ch_ctx,
+                                             struct nvgpu_gr_ctx *gr_ctx,
                                             u64 addr, bool patch)
 {
-        struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
        int attrBufferSize;
        if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
        attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
-        gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
+        gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
-        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+        gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
                gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
                gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
-        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
+        gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
                gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
-        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
+        gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
                gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
                gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
 }
@@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
        u32 offset = gk20a_gr_gpc_offset(g, gpc) +
                        gk20a_gr_tpc_offset(g, tpc) +
                        gv11b_gr_sm_offset(g, sm);
+        struct tsg_gk20a *tsg;
        *early_exit = false;
        *ignore_debugger = false;
@@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
                return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
                                warp_esr, fault_ch);
-        if (fault_ch)
+        if (fault_ch) {
-                cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
+                tsg = tsg_gk20a_from_ch(fault_ch);
+                if (!tsg)
+                        return -EINVAL;
+                cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
                        NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
+        }
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
                        "SM Exception received on gpc %d tpc %d sm %d = 0x%08x",
@@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
        if (err)
                return err;
-        ctx = &c->ch_ctx.ctx_header;
+        ctx = &c->ctx_header;
        addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
        addr_hi = u64_hi32(ctx->mem.gpu_va);
@@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
 {
-        struct channel_ctx_gk20a *ch_ctx = NULL;
+        struct nvgpu_gr_ctx *ch_ctx = NULL;
        u32 pd_ab_dist_cfg0;
        u32 ds_debug;
        u32 mpc_vtg_debug;
@@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
                struct channel_gk20a *ch, u32 sm_id,
                struct nvgpu_gr_sm_error_state *sm_error_state)
 {
+        struct tsg_gk20a *tsg;
        u32 gpc, tpc, sm, offset;
        struct gr_gk20a *gr = &g->gr;
-        struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+        struct nvgpu_gr_ctx *ch_ctx;
        int err = 0;
+        tsg = tsg_gk20a_from_ch(ch);
+        if (!tsg)
+                return -EINVAL;
+        ch_ctx = &tsg->gr_ctx;
        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        gr->sm_error_states[sm_id].hww_global_esr =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b69e69bd..022a7698 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -41,9 +41,10 @@ struct zbc_s_table {
 };
 struct gk20a;
+struct gr_gk20a;
 struct zbc_entry;
 struct zbc_query_params;
-struct channel_ctx_gk20a;
+struct nvgpu_gr_ctx;
 struct nvgpu_warpstate;
 struct nvgpu_gr_sm_error_state;
 struct gr_ctx_desc;
@@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
 int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
                       u32 expect_delay);
 void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
-                                             struct channel_ctx_gk20a *ch_ctx,
+                                             struct nvgpu_gr_ctx *ch_ctx,
                                             u64 addr, bool patch);
 void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
 void gr_gv11b_get_access_map(struct gk20a *g,
@@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
 void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g);
 int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
-                                struct gr_ctx_desc *gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm, u32 class,
                                u32 graphics_preempt_mode,
                                u32 compute_preempt_mode);
 void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
-                struct channel_ctx_gk20a *ch_ctx,
+                struct channel_gk20a *ch_ctx,
                struct nvgpu_mem *mem);
 #endif
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index aa3d52af..0a552f5b 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = {
                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
                .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
                .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
-                .free_channel_ctx = gk20a_free_channel_ctx,
                .alloc_obj_ctx = gk20a_alloc_obj_ctx,
                .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
                .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index fe1aa8a5..607fff91 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
 void gv11b_free_subctx_header(struct channel_gk20a *c)
 {
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct ctx_header_desc *ctx = &c->ctx_header;
        struct gk20a *g = c->g;
        nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header");
@@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c)
 int gv11b_alloc_subctx_header(struct channel_gk20a *c)
 {
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct ctx_header_desc *ctx = &c->ctx_header;
        struct gk20a *g = c->g;
        int ret = 0;
        nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
-        if (ctx->mem.gpu_va == 0) {
+        if (!nvgpu_mem_is_valid(&ctx->mem)) {
                ret = nvgpu_dma_alloc_flags_sys(g,
                                0, /* No Special flags */
                                ctxsw_prog_fecs_header_v(),
@@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
 int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
 {
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct ctx_header_desc *ctx = &c->ctx_header;
        struct nvgpu_mem *gr_mem;
        struct gk20a *g = c->g;
        int ret = 0;
        u32 addr_lo, addr_hi;
+        struct tsg_gk20a *tsg;
+        struct nvgpu_gr_ctx *gr_ctx;
-        addr_lo = u64_lo32(gpu_va);
+        tsg = tsg_gk20a_from_ch(c);
-        addr_hi = u64_hi32(gpu_va);
+        if (!tsg)
+                return -EINVAL;
+        gr_ctx = &tsg->gr_ctx;
        gr_mem = &ctx->mem;
        g->ops.mm.l2_flush(g, true);
        if (nvgpu_mem_begin(g, gr_mem))
                return -ENOMEM;
+        /* set priv access map */
+        addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
+        addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
+        nvgpu_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
+                addr_lo);
+        nvgpu_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
+                addr_hi);
+        addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
+        addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
+        nvgpu_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_patch_adr_lo_o(),
+                addr_lo);
+        nvgpu_mem_wr(g, gr_mem,
+                ctxsw_prog_main_image_patch_adr_hi_o(),
+                addr_hi);
+        g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va);
+        g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va);
+        addr_lo = u64_lo32(gpu_va);
+        addr_hi = u64_hi32(gpu_va);
        nvgpu_mem_wr(g, gr_mem,
                ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
        nvgpu_mem_wr(g, gr_mem,