summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-12-15 12:04:15 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-17 15:29:09 -0500
commit2f6698b863c9cc1db6455637b7c72e812b470b93 (patch)
treed0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/gv11b
parent6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff)
gpu: nvgpu: Make graphics context property of TSG
Move graphics context ownership to TSG instead of channel. Combine channel_ctx_gk20a and gr_ctx_desc to one structure, because the split between them was arbitrary. Move context header to be property of channel. Bug 1842197 Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1639532 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c66
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h9
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c42
4 files changed, 83 insertions, 35 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d5924169..3030def8 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1373,7 +1373,7 @@ fail_free:
1373} 1373}
1374 1374
1375int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, 1375int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
1376 struct gr_ctx_desc *gr_ctx, 1376 struct nvgpu_gr_ctx *gr_ctx,
1377 struct vm_gk20a *vm, u32 class, 1377 struct vm_gk20a *vm, u32 class,
1378 u32 graphics_preempt_mode, 1378 u32 graphics_preempt_mode,
1379 u32 compute_preempt_mode) 1379 u32 compute_preempt_mode)
@@ -1497,13 +1497,13 @@ fail:
1497} 1497}
1498 1498
1499void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, 1499void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1500 struct channel_ctx_gk20a *ch_ctx, 1500 struct channel_gk20a *c,
1501 struct nvgpu_mem *mem) 1501 struct nvgpu_mem *mem)
1502{ 1502{
1503 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 1503 struct tsg_gk20a *tsg;
1504 struct ctx_header_desc *ctx = &ch_ctx->ctx_header; 1504 struct nvgpu_gr_ctx *gr_ctx;
1505 struct ctx_header_desc *ctx = &c->ctx_header;
1505 struct nvgpu_mem *ctxheader = &ctx->mem; 1506 struct nvgpu_mem *ctxheader = &ctx->mem;
1506
1507 u32 gfxp_preempt_option = 1507 u32 gfxp_preempt_option =
1508 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); 1508 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1509 u32 cilp_preempt_option = 1509 u32 cilp_preempt_option =
@@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1514 1514
1515 gk20a_dbg_fn(""); 1515 gk20a_dbg_fn("");
1516 1516
1517 tsg = tsg_gk20a_from_ch(c);
1518 if (!tsg)
1519 return;
1520
1521 gr_ctx = &tsg->gr_ctx;
1522
1517 if (gr_ctx->graphics_preempt_mode == 1523 if (gr_ctx->graphics_preempt_mode ==
1518 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { 1524 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1519 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); 1525 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
@@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1552 gr_ctx->preempt_ctxsw_buffer.gpu_va); 1558 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1553 } 1559 }
1554 1560
1555 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); 1561 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
1556 if (err) { 1562 if (err) {
1557 nvgpu_err(g, "can't map patch context"); 1563 nvgpu_err(g, "can't map patch context");
1558 goto out; 1564 goto out;
@@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1564 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); 1570 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1565 1571
1566 gk20a_dbg_info("attrib cb addr : 0x%016x", addr); 1572 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1567 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); 1573 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
1568 1574
1569 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> 1575 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
1570 gr_scc_pagepool_base_addr_39_8_align_bits_v()) | 1576 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1575 if (size == g->ops.gr.pagepool_default_size(g)) 1581 if (size == g->ops.gr.pagepool_default_size(g))
1576 size = gr_scc_pagepool_total_pages_hwmax_v(); 1582 size = gr_scc_pagepool_total_pages_hwmax_v();
1577 1583
1578 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); 1584 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
1579 1585
1580 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> 1586 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
1581 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | 1587 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1584 size = gr_ctx->spill_ctxsw_buffer.size / 1590 size = gr_ctx->spill_ctxsw_buffer.size /
1585 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); 1591 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1586 1592
1587 gr_gk20a_ctx_patch_write(g, ch_ctx, 1593 gr_gk20a_ctx_patch_write(g, gr_ctx,
1588 gr_gpc0_swdx_rm_spill_buffer_addr_r(), 1594 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1589 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), 1595 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1590 true); 1596 true);
1591 gr_gk20a_ctx_patch_write(g, ch_ctx, 1597 gr_gk20a_ctx_patch_write(g, gr_ctx,
1592 gr_gpc0_swdx_rm_spill_buffer_size_r(), 1598 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1593 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), 1599 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1594 true); 1600 true);
1595 1601
1596 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); 1602 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1597 gr_gk20a_ctx_patch_write(g, ch_ctx, 1603 gr_gk20a_ctx_patch_write(g, gr_ctx,
1598 gr_gpcs_swdx_beta_cb_ctrl_r(), 1604 gr_gpcs_swdx_beta_cb_ctrl_r(),
1599 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( 1605 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1600 cbes_reserve), 1606 cbes_reserve),
1601 true); 1607 true);
1602 gr_gk20a_ctx_patch_write(g, ch_ctx, 1608 gr_gk20a_ctx_patch_write(g, gr_ctx,
1603 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), 1609 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1604 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( 1610 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1605 cbes_reserve), 1611 cbes_reserve),
1606 true); 1612 true);
1607 1613
1608 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); 1614 gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
1609 } 1615 }
1610 1616
1611out: 1617out:
@@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1902} 1908}
1903 1909
1904void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, 1910void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1905 struct channel_ctx_gk20a *ch_ctx, 1911 struct nvgpu_gr_ctx *gr_ctx,
1906 u64 addr, bool patch) 1912 u64 addr, bool patch)
1907{ 1913{
1908 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1909 int attrBufferSize; 1914 int attrBufferSize;
1910 1915
1911 if (gr_ctx->preempt_ctxsw_buffer.gpu_va) 1916 if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1915 1920
1916 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); 1921 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1917 1922
1918 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); 1923 gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
1919 1924
1920 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), 1925 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1921 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | 1926 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1922 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); 1927 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1923 1928
1924 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), 1929 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1925 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); 1930 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1926 1931
1927 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), 1932 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1928 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | 1933 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1929 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 1934 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1930} 1935}
@@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2042 u32 offset = gk20a_gr_gpc_offset(g, gpc) + 2047 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
2043 gk20a_gr_tpc_offset(g, tpc) + 2048 gk20a_gr_tpc_offset(g, tpc) +
2044 gv11b_gr_sm_offset(g, sm); 2049 gv11b_gr_sm_offset(g, sm);
2050 struct tsg_gk20a *tsg;
2045 2051
2046 *early_exit = false; 2052 *early_exit = false;
2047 *ignore_debugger = false; 2053 *ignore_debugger = false;
@@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2054 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, 2060 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
2055 warp_esr, fault_ch); 2061 warp_esr, fault_ch);
2056 2062
2057 if (fault_ch) 2063 if (fault_ch) {
2058 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == 2064 tsg = tsg_gk20a_from_ch(fault_ch);
2065 if (!tsg)
2066 return -EINVAL;
2067
2068 cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
2059 NVGPU_PREEMPTION_MODE_COMPUTE_CILP); 2069 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
2070 }
2060 2071
2061 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, 2072 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
2062 "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", 2073 "SM Exception received on gpc %d tpc %d sm %d = 0x%08x",
@@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
2509 if (err) 2520 if (err)
2510 return err; 2521 return err;
2511 2522
2512 ctx = &c->ch_ctx.ctx_header; 2523 ctx = &c->ctx_header;
2513 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); 2524 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
2514 addr_hi = u64_hi32(ctx->mem.gpu_va); 2525 addr_hi = u64_hi32(ctx->mem.gpu_va);
2515 2526
@@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
2529 2540
2530int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) 2541int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
2531{ 2542{
2532 struct channel_ctx_gk20a *ch_ctx = NULL; 2543 struct nvgpu_gr_ctx *ch_ctx = NULL;
2533 u32 pd_ab_dist_cfg0; 2544 u32 pd_ab_dist_cfg0;
2534 u32 ds_debug; 2545 u32 ds_debug;
2535 u32 mpc_vtg_debug; 2546 u32 mpc_vtg_debug;
@@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
2836 struct channel_gk20a *ch, u32 sm_id, 2847 struct channel_gk20a *ch, u32 sm_id,
2837 struct nvgpu_gr_sm_error_state *sm_error_state) 2848 struct nvgpu_gr_sm_error_state *sm_error_state)
2838{ 2849{
2850 struct tsg_gk20a *tsg;
2839 u32 gpc, tpc, sm, offset; 2851 u32 gpc, tpc, sm, offset;
2840 struct gr_gk20a *gr = &g->gr; 2852 struct gr_gk20a *gr = &g->gr;
2841 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 2853 struct nvgpu_gr_ctx *ch_ctx;
2842 int err = 0; 2854 int err = 0;
2843 2855
2856 tsg = tsg_gk20a_from_ch(ch);
2857 if (!tsg)
2858 return -EINVAL;
2859
2860 ch_ctx = &tsg->gr_ctx;
2861
2844 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 2862 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
2845 2863
2846 gr->sm_error_states[sm_id].hww_global_esr = 2864 gr->sm_error_states[sm_id].hww_global_esr =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b69e69bd..022a7698 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -41,9 +41,10 @@ struct zbc_s_table {
41}; 41};
42 42
43struct gk20a; 43struct gk20a;
44struct gr_gk20a;
44struct zbc_entry; 45struct zbc_entry;
45struct zbc_query_params; 46struct zbc_query_params;
46struct channel_ctx_gk20a; 47struct nvgpu_gr_ctx;
47struct nvgpu_warpstate; 48struct nvgpu_warpstate;
48struct nvgpu_gr_sm_error_state; 49struct nvgpu_gr_sm_error_state;
49struct gr_ctx_desc; 50struct gr_ctx_desc;
@@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
128int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, 129int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
129 u32 expect_delay); 130 u32 expect_delay);
130void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, 131void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
131 struct channel_ctx_gk20a *ch_ctx, 132 struct nvgpu_gr_ctx *ch_ctx,
132 u64 addr, bool patch); 133 u64 addr, bool patch);
133void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); 134void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
134void gr_gv11b_get_access_map(struct gk20a *g, 135void gr_gv11b_get_access_map(struct gk20a *g,
@@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
222void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); 223void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g);
223 224
224int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, 225int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
225 struct gr_ctx_desc *gr_ctx, 226 struct nvgpu_gr_ctx *gr_ctx,
226 struct vm_gk20a *vm, u32 class, 227 struct vm_gk20a *vm, u32 class,
227 u32 graphics_preempt_mode, 228 u32 graphics_preempt_mode,
228 u32 compute_preempt_mode); 229 u32 compute_preempt_mode);
229 230
230void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, 231void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
231 struct channel_ctx_gk20a *ch_ctx, 232 struct channel_gk20a *ch_ctx,
232 struct nvgpu_mem *mem); 233 struct nvgpu_mem *mem);
233 234
234#endif 235#endif
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index aa3d52af..0a552f5b 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = {
272 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, 272 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
273 .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, 273 .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
274 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, 274 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
275 .free_channel_ctx = gk20a_free_channel_ctx,
276 .alloc_obj_ctx = gk20a_alloc_obj_ctx, 275 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
277 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, 276 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
278 .get_zcull_info = gr_gk20a_get_zcull_info, 277 .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index fe1aa8a5..607fff91 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
43 43
44void gv11b_free_subctx_header(struct channel_gk20a *c) 44void gv11b_free_subctx_header(struct channel_gk20a *c)
45{ 45{
46 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 46 struct ctx_header_desc *ctx = &c->ctx_header;
47 struct gk20a *g = c->g; 47 struct gk20a *g = c->g;
48 48
49 nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); 49 nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header");
@@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c)
57 57
58int gv11b_alloc_subctx_header(struct channel_gk20a *c) 58int gv11b_alloc_subctx_header(struct channel_gk20a *c)
59{ 59{
60 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 60 struct ctx_header_desc *ctx = &c->ctx_header;
61 struct gk20a *g = c->g; 61 struct gk20a *g = c->g;
62 int ret = 0; 62 int ret = 0;
63 63
64 nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); 64 nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
65 65
66 if (ctx->mem.gpu_va == 0) { 66 if (!nvgpu_mem_is_valid(&ctx->mem)) {
67 ret = nvgpu_dma_alloc_flags_sys(g, 67 ret = nvgpu_dma_alloc_flags_sys(g,
68 0, /* No Special flags */ 68 0, /* No Special flags */
69 ctxsw_prog_fecs_header_v(), 69 ctxsw_prog_fecs_header_v(),
@@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
111 111
112int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) 112int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
113{ 113{
114 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 114 struct ctx_header_desc *ctx = &c->ctx_header;
115 struct nvgpu_mem *gr_mem; 115 struct nvgpu_mem *gr_mem;
116 struct gk20a *g = c->g; 116 struct gk20a *g = c->g;
117 int ret = 0; 117 int ret = 0;
118 u32 addr_lo, addr_hi; 118 u32 addr_lo, addr_hi;
119 struct tsg_gk20a *tsg;
120 struct nvgpu_gr_ctx *gr_ctx;
119 121
120 addr_lo = u64_lo32(gpu_va); 122 tsg = tsg_gk20a_from_ch(c);
121 addr_hi = u64_hi32(gpu_va); 123 if (!tsg)
124 return -EINVAL;
125
126 gr_ctx = &tsg->gr_ctx;
122 127
123 gr_mem = &ctx->mem; 128 gr_mem = &ctx->mem;
124 g->ops.mm.l2_flush(g, true); 129 g->ops.mm.l2_flush(g, true);
125 if (nvgpu_mem_begin(g, gr_mem)) 130 if (nvgpu_mem_begin(g, gr_mem))
126 return -ENOMEM; 131 return -ENOMEM;
127 132
133 /* set priv access map */
134 addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
135 addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
136 nvgpu_mem_wr(g, gr_mem,
137 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
138 addr_lo);
139 nvgpu_mem_wr(g, gr_mem,
140 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
141 addr_hi);
142
143 addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
144 addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
145 nvgpu_mem_wr(g, gr_mem,
146 ctxsw_prog_main_image_patch_adr_lo_o(),
147 addr_lo);
148 nvgpu_mem_wr(g, gr_mem,
149 ctxsw_prog_main_image_patch_adr_hi_o(),
150 addr_hi);
151
152 g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va);
153 g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va);
154
155 addr_lo = u64_lo32(gpu_va);
156 addr_hi = u64_hi32(gpu_va);
157
128 nvgpu_mem_wr(g, gr_mem, 158 nvgpu_mem_wr(g, gr_mem,
129 ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); 159 ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
130 nvgpu_mem_wr(g, gr_mem, 160 nvgpu_mem_wr(g, gr_mem,