summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-12-15 12:04:15 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-17 15:29:09 -0500
commit2f6698b863c9cc1db6455637b7c72e812b470b93 (patch)
treed0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parent6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff)
gpu: nvgpu: Make graphics context property of TSG
Move graphics context ownership to TSG instead of channel. Combine channel_ctx_gk20a and gr_ctx_desc to one structure, because the split between them was arbitrary. Move context header to be property of channel. Bug 1842197 Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1639532 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c202
1 files changed, 125 insertions, 77 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 56acc732..549a4da4 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -389,9 +389,9 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
389int gr_gp10b_commit_global_cb_manager(struct gk20a *g, 389int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
390 struct channel_gk20a *c, bool patch) 390 struct channel_gk20a *c, bool patch)
391{ 391{
392 struct tsg_gk20a *tsg;
392 struct gr_gk20a *gr = &g->gr; 393 struct gr_gk20a *gr = &g->gr;
393 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 394 struct nvgpu_gr_ctx *gr_ctx;
394 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
395 u32 attrib_offset_in_chunk = 0; 395 u32 attrib_offset_in_chunk = 0;
396 u32 alpha_offset_in_chunk = 0; 396 u32 alpha_offset_in_chunk = 0;
397 u32 pd_ab_max_output; 397 u32 pd_ab_max_output;
@@ -405,6 +405,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
405 405
406 gk20a_dbg_fn(""); 406 gk20a_dbg_fn("");
407 407
408 tsg = tsg_gk20a_from_ch(c);
409 if (!tsg)
410 return -EINVAL;
411
412 gr_ctx = &tsg->gr_ctx;
413
408 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { 414 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
409 attrib_size_in_chunk = gr->attrib_cb_gfxp_size; 415 attrib_size_in_chunk = gr->attrib_cb_gfxp_size;
410 cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; 416 cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size;
@@ -413,9 +419,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
413 cb_attrib_cache_size_init = gr->attrib_cb_default_size; 419 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
414 } 420 }
415 421
416 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), 422 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
417 gr->attrib_cb_default_size, patch); 423 gr->attrib_cb_default_size, patch);
418 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), 424 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
419 gr->alpha_cb_default_size, patch); 425 gr->alpha_cb_default_size, patch);
420 426
421 pd_ab_max_output = (gr->alpha_cb_default_size * 427 pd_ab_max_output = (gr->alpha_cb_default_size *
@@ -423,11 +429,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
423 gr_pd_ab_dist_cfg1_max_output_granularity_v(); 429 gr_pd_ab_dist_cfg1_max_output_granularity_v();
424 430
425 if (g->gr.pd_max_batches) { 431 if (g->gr.pd_max_batches) {
426 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), 432 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
427 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | 433 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
428 gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); 434 gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch);
429 } else { 435 } else {
430 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), 436 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
431 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | 437 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
432 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); 438 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
433 } 439 }
@@ -447,17 +453,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
447 cbm_cfg_size_steadystate = gr->attrib_cb_default_size * 453 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
448 gr->pes_tpc_count[ppc_index][gpc_index]; 454 gr->pes_tpc_count[ppc_index][gpc_index];
449 455
450 gr_gk20a_ctx_patch_write(g, ch_ctx, 456 gr_gk20a_ctx_patch_write(g, gr_ctx,
451 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + 457 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
452 ppc_in_gpc_stride * ppc_index, 458 ppc_in_gpc_stride * ppc_index,
453 cbm_cfg_size_beta, patch); 459 cbm_cfg_size_beta, patch);
454 460
455 gr_gk20a_ctx_patch_write(g, ch_ctx, 461 gr_gk20a_ctx_patch_write(g, gr_ctx,
456 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + 462 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
457 ppc_in_gpc_stride * ppc_index, 463 ppc_in_gpc_stride * ppc_index,
458 attrib_offset_in_chunk, patch); 464 attrib_offset_in_chunk, patch);
459 465
460 gr_gk20a_ctx_patch_write(g, ch_ctx, 466 gr_gk20a_ctx_patch_write(g, gr_ctx,
461 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + 467 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
462 ppc_in_gpc_stride * ppc_index, 468 ppc_in_gpc_stride * ppc_index,
463 cbm_cfg_size_steadystate, 469 cbm_cfg_size_steadystate,
@@ -466,12 +472,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
466 attrib_offset_in_chunk += attrib_size_in_chunk * 472 attrib_offset_in_chunk += attrib_size_in_chunk *
467 gr->pes_tpc_count[ppc_index][gpc_index]; 473 gr->pes_tpc_count[ppc_index][gpc_index];
468 474
469 gr_gk20a_ctx_patch_write(g, ch_ctx, 475 gr_gk20a_ctx_patch_write(g, gr_ctx,
470 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + 476 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
471 ppc_in_gpc_stride * ppc_index, 477 ppc_in_gpc_stride * ppc_index,
472 cbm_cfg_size_alpha, patch); 478 cbm_cfg_size_alpha, patch);
473 479
474 gr_gk20a_ctx_patch_write(g, ch_ctx, 480 gr_gk20a_ctx_patch_write(g, gr_ctx,
475 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + 481 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
476 ppc_in_gpc_stride * ppc_index, 482 ppc_in_gpc_stride * ppc_index,
477 alpha_offset_in_chunk, patch); 483 alpha_offset_in_chunk, patch);
@@ -479,7 +485,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
479 alpha_offset_in_chunk += gr->alpha_cb_size * 485 alpha_offset_in_chunk += gr->alpha_cb_size *
480 gr->pes_tpc_count[ppc_index][gpc_index]; 486 gr->pes_tpc_count[ppc_index][gpc_index];
481 487
482 gr_gk20a_ctx_patch_write(g, ch_ctx, 488 gr_gk20a_ctx_patch_write(g, gr_ctx,
483 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), 489 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
484 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), 490 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
485 patch); 491 patch);
@@ -490,20 +496,20 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
490} 496}
491 497
492void gr_gp10b_commit_global_pagepool(struct gk20a *g, 498void gr_gp10b_commit_global_pagepool(struct gk20a *g,
493 struct channel_ctx_gk20a *ch_ctx, 499 struct nvgpu_gr_ctx *gr_ctx,
494 u64 addr, u32 size, bool patch) 500 u64 addr, u32 size, bool patch)
495{ 501{
496 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), 502 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
497 gr_scc_pagepool_base_addr_39_8_f(addr), patch); 503 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
498 504
499 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), 505 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
500 gr_scc_pagepool_total_pages_f(size) | 506 gr_scc_pagepool_total_pages_f(size) |
501 gr_scc_pagepool_valid_true_f(), patch); 507 gr_scc_pagepool_valid_true_f(), patch);
502 508
503 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), 509 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
504 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); 510 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
505 511
506 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), 512 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
507 gr_gpcs_gcc_pagepool_total_pages_f(size), patch); 513 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
508} 514}
509 515
@@ -947,7 +953,7 @@ fail_free:
947} 953}
948 954
949int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, 955int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
950 struct gr_ctx_desc *gr_ctx, 956 struct nvgpu_gr_ctx *gr_ctx,
951 struct vm_gk20a *vm, u32 class, 957 struct vm_gk20a *vm, u32 class,
952 u32 graphics_preempt_mode, 958 u32 graphics_preempt_mode,
953 u32 compute_preempt_mode) 959 u32 compute_preempt_mode)
@@ -1071,7 +1077,7 @@ fail:
1071} 1077}
1072 1078
1073int gr_gp10b_alloc_gr_ctx(struct gk20a *g, 1079int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1074 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 1080 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
1075 u32 class, 1081 u32 class,
1076 u32 flags) 1082 u32 flags)
1077{ 1083{
@@ -1085,7 +1091,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1085 if (err) 1091 if (err)
1086 return err; 1092 return err;
1087 1093
1088 (*gr_ctx)->ctx_id_valid = false; 1094 gr_ctx->ctx_id_valid = false;
1089 1095
1090 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) 1096 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
1091 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; 1097 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
@@ -1094,7 +1100,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1094 1100
1095 if (graphics_preempt_mode || compute_preempt_mode) { 1101 if (graphics_preempt_mode || compute_preempt_mode) {
1096 if (g->ops.gr.set_ctxsw_preemption_mode) { 1102 if (g->ops.gr.set_ctxsw_preemption_mode) {
1097 err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm, 1103 err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
1098 class, graphics_preempt_mode, compute_preempt_mode); 1104 class, graphics_preempt_mode, compute_preempt_mode);
1099 if (err) { 1105 if (err) {
1100 nvgpu_err(g, "set_ctxsw_preemption_mode failed"); 1106 nvgpu_err(g, "set_ctxsw_preemption_mode failed");
@@ -1109,14 +1115,13 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1109 return 0; 1115 return 0;
1110 1116
1111fail_free_gk20a_ctx: 1117fail_free_gk20a_ctx:
1112 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); 1118 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
1113 *gr_ctx = NULL;
1114 1119
1115 return err; 1120 return err;
1116} 1121}
1117 1122
1118void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, 1123void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
1119 struct gr_ctx_desc *gr_ctx) 1124 struct nvgpu_gr_ctx *gr_ctx)
1120{ 1125{
1121 struct nvgpu_mem *mem = &gr_ctx->mem; 1126 struct nvgpu_mem *mem = &gr_ctx->mem;
1122 1127
@@ -1168,13 +1173,13 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
1168} 1173}
1169 1174
1170void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, 1175void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1171 struct channel_ctx_gk20a *ch_ctx, 1176 struct channel_gk20a *c,
1172 struct nvgpu_mem *mem) 1177 struct nvgpu_mem *mem)
1173{ 1178{
1174 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 1179 struct tsg_gk20a *tsg;
1175 struct ctx_header_desc *ctx = &ch_ctx->ctx_header; 1180 struct nvgpu_gr_ctx *gr_ctx;
1181 struct ctx_header_desc *ctx = &c->ctx_header;
1176 struct nvgpu_mem *ctxheader = &ctx->mem; 1182 struct nvgpu_mem *ctxheader = &ctx->mem;
1177
1178 u32 gfxp_preempt_option = 1183 u32 gfxp_preempt_option =
1179 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); 1184 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1180 u32 cilp_preempt_option = 1185 u32 cilp_preempt_option =
@@ -1185,6 +1190,12 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1185 1190
1186 gk20a_dbg_fn(""); 1191 gk20a_dbg_fn("");
1187 1192
1193 tsg = tsg_gk20a_from_ch(c);
1194 if (!tsg)
1195 return;
1196
1197 gr_ctx = &tsg->gr_ctx;
1198
1188 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { 1199 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1189 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); 1200 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1190 nvgpu_mem_wr(g, mem, 1201 nvgpu_mem_wr(g, mem,
@@ -1220,7 +1231,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1220 gr_ctx->preempt_ctxsw_buffer.gpu_va); 1231 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1221 } 1232 }
1222 1233
1223 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); 1234 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
1224 if (err) { 1235 if (err) {
1225 nvgpu_err(g, "can't map patch context"); 1236 nvgpu_err(g, "can't map patch context");
1226 goto out; 1237 goto out;
@@ -1232,7 +1243,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1232 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); 1243 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1233 1244
1234 gk20a_dbg_info("attrib cb addr : 0x%016x", addr); 1245 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1235 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); 1246 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
1236 1247
1237 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> 1248 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
1238 gr_scc_pagepool_base_addr_39_8_align_bits_v()) | 1249 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1243,7 +1254,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1243 if (size == g->ops.gr.pagepool_default_size(g)) 1254 if (size == g->ops.gr.pagepool_default_size(g))
1244 size = gr_scc_pagepool_total_pages_hwmax_v(); 1255 size = gr_scc_pagepool_total_pages_hwmax_v();
1245 1256
1246 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); 1257 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
1247 1258
1248 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> 1259 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
1249 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | 1260 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1252,28 +1263,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1252 size = gr_ctx->spill_ctxsw_buffer.size / 1263 size = gr_ctx->spill_ctxsw_buffer.size /
1253 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); 1264 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1254 1265
1255 gr_gk20a_ctx_patch_write(g, ch_ctx, 1266 gr_gk20a_ctx_patch_write(g, gr_ctx,
1256 gr_gpc0_swdx_rm_spill_buffer_addr_r(), 1267 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1257 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), 1268 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1258 true); 1269 true);
1259 gr_gk20a_ctx_patch_write(g, ch_ctx, 1270 gr_gk20a_ctx_patch_write(g, gr_ctx,
1260 gr_gpc0_swdx_rm_spill_buffer_size_r(), 1271 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1261 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), 1272 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1262 true); 1273 true);
1263 1274
1264 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); 1275 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1265 gr_gk20a_ctx_patch_write(g, ch_ctx, 1276 gr_gk20a_ctx_patch_write(g, gr_ctx,
1266 gr_gpcs_swdx_beta_cb_ctrl_r(), 1277 gr_gpcs_swdx_beta_cb_ctrl_r(),
1267 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( 1278 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1268 cbes_reserve), 1279 cbes_reserve),
1269 true); 1280 true);
1270 gr_gk20a_ctx_patch_write(g, ch_ctx, 1281 gr_gk20a_ctx_patch_write(g, gr_ctx,
1271 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), 1282 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1272 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( 1283 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1273 cbes_reserve), 1284 cbes_reserve),
1274 true); 1285 true);
1275 1286
1276 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); 1287 gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
1277 } 1288 }
1278 1289
1279out: 1290out:
@@ -1478,10 +1489,9 @@ int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1478} 1489}
1479 1490
1480void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, 1491void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1481 struct channel_ctx_gk20a *ch_ctx, 1492 struct nvgpu_gr_ctx *gr_ctx,
1482 u64 addr, bool patch) 1493 u64 addr, bool patch)
1483{ 1494{
1484 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1485 int attrBufferSize; 1495 int attrBufferSize;
1486 1496
1487 if (gr_ctx->preempt_ctxsw_buffer.gpu_va) 1497 if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1491,37 +1501,37 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1491 1501
1492 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); 1502 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1493 1503
1494 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); 1504 gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
1495 1505
1496 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), 1506 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1497 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | 1507 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1498 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); 1508 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1499 1509
1500 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), 1510 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1501 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); 1511 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1502 1512
1503 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), 1513 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1504 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | 1514 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1505 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 1515 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1506} 1516}
1507 1517
1508void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, 1518void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1509 struct channel_ctx_gk20a *ch_ctx, 1519 struct nvgpu_gr_ctx *gr_ctx,
1510 u64 addr, u64 size, bool patch) 1520 u64 addr, u64 size, bool patch)
1511{ 1521{
1512 u32 data; 1522 u32 data;
1513 1523
1514 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), 1524 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
1515 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); 1525 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1516 1526
1517 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), 1527 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
1518 gr_scc_bundle_cb_size_div_256b_f(size) | 1528 gr_scc_bundle_cb_size_div_256b_f(size) |
1519 gr_scc_bundle_cb_size_valid_true_f(), patch); 1529 gr_scc_bundle_cb_size_valid_true_f(), patch);
1520 1530
1521 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), 1531 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1522 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); 1532 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1523 1533
1524 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), 1534 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1525 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | 1535 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1526 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); 1536 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1527 1537
@@ -1535,7 +1545,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1535 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", 1545 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1536 g->gr.bundle_cb_token_limit, data); 1546 g->gr.bundle_cb_token_limit, data);
1537 1547
1538 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), 1548 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
1539 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | 1549 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1540 gr_pd_ab_dist_cfg2_state_limit_f(data), patch); 1550 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1541} 1551}
@@ -1706,14 +1716,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
1706 struct channel_gk20a *fault_ch) 1716 struct channel_gk20a *fault_ch)
1707{ 1717{
1708 int ret; 1718 int ret;
1709 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1710 struct tsg_gk20a *tsg; 1719 struct tsg_gk20a *tsg;
1720 struct nvgpu_gr_ctx *gr_ctx;
1711 1721
1712 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); 1722 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1713 1723
1714 if (!gr_ctx) 1724 tsg = tsg_gk20a_from_ch(fault_ch);
1725 if (!tsg)
1715 return -EINVAL; 1726 return -EINVAL;
1716 1727
1728 gr_ctx = &tsg->gr_ctx;
1729
1717 if (gr_ctx->cilp_preempt_pending) { 1730 if (gr_ctx->cilp_preempt_pending) {
1718 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, 1731 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1719 "CILP is already pending for chid %d", 1732 "CILP is already pending for chid %d",
@@ -1783,13 +1796,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
1783static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, 1796static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
1784 struct channel_gk20a *fault_ch) 1797 struct channel_gk20a *fault_ch)
1785{ 1798{
1786 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; 1799 struct tsg_gk20a *tsg;
1800 struct nvgpu_gr_ctx *gr_ctx;
1787 1801
1788 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); 1802 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1789 1803
1790 if (!gr_ctx) 1804 tsg = tsg_gk20a_from_ch(fault_ch);
1805 if (!tsg)
1791 return -EINVAL; 1806 return -EINVAL;
1792 1807
1808 gr_ctx = &tsg->gr_ctx;
1809
1793 /* The ucode is self-clearing, so all we need to do here is 1810 /* The ucode is self-clearing, so all we need to do here is
1794 to clear cilp_preempt_pending. */ 1811 to clear cilp_preempt_pending. */
1795 if (!gr_ctx->cilp_preempt_pending) { 1812 if (!gr_ctx->cilp_preempt_pending) {
@@ -1820,13 +1837,19 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1820 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 1837 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1821 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 1838 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1822 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; 1839 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1840 struct tsg_gk20a *tsg;
1823 1841
1824 *early_exit = false; 1842 *early_exit = false;
1825 *ignore_debugger = false; 1843 *ignore_debugger = false;
1826 1844
1827 if (fault_ch) 1845 if (fault_ch) {
1828 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == 1846 tsg = tsg_gk20a_from_ch(fault_ch);
1847 if (!tsg)
1848 return -EINVAL;
1849
1850 cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
1829 NVGPU_PREEMPTION_MODE_COMPUTE_CILP); 1851 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
1852 }
1830 1853
1831 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", 1854 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
1832 gpc, tpc, global_esr); 1855 gpc, tpc, global_esr);
@@ -1911,8 +1934,9 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1911 1934
1912static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) 1935static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1913{ 1936{
1914 struct gr_ctx_desc *gr_ctx; 1937 struct nvgpu_gr_ctx *gr_ctx;
1915 struct channel_gk20a *ch; 1938 struct channel_gk20a *ch;
1939 struct tsg_gk20a *tsg;
1916 int chid; 1940 int chid;
1917 int ret = -EINVAL; 1941 int ret = -EINVAL;
1918 1942
@@ -1922,7 +1946,11 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1922 if (!ch) 1946 if (!ch)
1923 return ret; 1947 return ret;
1924 1948
1925 gr_ctx = ch->ch_ctx.gr_ctx; 1949 tsg = tsg_gk20a_from_ch(ch);
1950 if (!tsg)
1951 return -EINVAL;
1952
1953 gr_ctx = &tsg->gr_ctx;
1926 1954
1927 if (gr_ctx->cilp_preempt_pending) { 1955 if (gr_ctx->cilp_preempt_pending) {
1928 *__chid = chid; 1956 *__chid = chid;
@@ -2022,11 +2050,17 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
2022 bool *cilp_preempt_pending) 2050 bool *cilp_preempt_pending)
2023{ 2051{
2024 struct gk20a *g = ch->g; 2052 struct gk20a *g = ch->g;
2025 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 2053 struct tsg_gk20a *tsg;
2026 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 2054 struct nvgpu_gr_ctx *gr_ctx;
2027 bool ctx_resident = false; 2055 bool ctx_resident = false;
2028 int err = 0; 2056 int err = 0;
2029 2057
2058 tsg = tsg_gk20a_from_ch(ch);
2059 if (!tsg)
2060 return -EINVAL;
2061
2062 gr_ctx = &tsg->gr_ctx;
2063
2030 *cilp_preempt_pending = false; 2064 *cilp_preempt_pending = false;
2031 2065
2032 if (gk20a_is_channel_ctx_resident(ch)) { 2066 if (gk20a_is_channel_ctx_resident(ch)) {
@@ -2097,15 +2131,22 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
2097 nvgpu_mutex_release(&g->dbg_sessions_lock); 2131 nvgpu_mutex_release(&g->dbg_sessions_lock);
2098 2132
2099 if (cilp_preempt_pending_ch) { 2133 if (cilp_preempt_pending_ch) {
2100 struct channel_ctx_gk20a *ch_ctx = 2134 struct tsg_gk20a *tsg;
2101 &cilp_preempt_pending_ch->ch_ctx; 2135 struct nvgpu_gr_ctx *gr_ctx;
2102 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
2103 struct nvgpu_timeout timeout; 2136 struct nvgpu_timeout timeout;
2104 2137
2105 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, 2138 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
2106 "CILP preempt pending, waiting %lu msecs for preemption", 2139 "CILP preempt pending, waiting %lu msecs for preemption",
2107 gk20a_get_gr_idle_timeout(g)); 2140 gk20a_get_gr_idle_timeout(g));
2108 2141
2142 tsg = tsg_gk20a_from_ch(cilp_preempt_pending_ch);
2143 if (!tsg) {
2144 err = -EINVAL;
2145 goto clean_up;
2146 }
2147
2148 gr_ctx = &tsg->gr_ctx;
2149
2109 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), 2150 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
2110 NVGPU_TIMER_CPU_TIMER); 2151 NVGPU_TIMER_CPU_TIMER);
2111 do { 2152 do {
@@ -2130,12 +2171,19 @@ clean_up:
2130int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, 2171int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
2131 bool boost) 2172 bool boost)
2132{ 2173{
2133 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; 2174 struct tsg_gk20a *tsg;
2175 struct nvgpu_gr_ctx *gr_ctx;
2134 struct gk20a *g = ch->g; 2176 struct gk20a *g = ch->g;
2135 struct nvgpu_mem *mem = &gr_ctx->mem; 2177 struct nvgpu_mem *mem;
2136 int err = 0; 2178 int err = 0;
2137 2179
2180 tsg = tsg_gk20a_from_ch(ch);
2181 if (!tsg)
2182 return -EINVAL;
2183
2184 gr_ctx = &tsg->gr_ctx;
2138 gr_ctx->boosted_ctx = boost; 2185 gr_ctx->boosted_ctx = boost;
2186 mem = &gr_ctx->mem;
2139 2187
2140 if (nvgpu_mem_begin(g, mem)) 2188 if (nvgpu_mem_begin(g, mem))
2141 return -ENOMEM; 2189 return -ENOMEM;
@@ -2162,7 +2210,7 @@ unmap_ctx:
2162} 2210}
2163 2211
2164void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, 2212void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
2165 struct gr_ctx_desc *gr_ctx) { 2213 struct nvgpu_gr_ctx *gr_ctx) {
2166 u32 v; 2214 u32 v;
2167 2215
2168 v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( 2216 v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
@@ -2174,13 +2222,12 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2174 u32 graphics_preempt_mode, 2222 u32 graphics_preempt_mode,
2175 u32 compute_preempt_mode) 2223 u32 compute_preempt_mode)
2176{ 2224{
2177 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; 2225 struct nvgpu_gr_ctx *gr_ctx;
2178 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
2179 struct gk20a *g = ch->g; 2226 struct gk20a *g = ch->g;
2180 struct tsg_gk20a *tsg; 2227 struct tsg_gk20a *tsg;
2181 struct vm_gk20a *vm; 2228 struct vm_gk20a *vm;
2182 struct nvgpu_mem *mem = &gr_ctx->mem; 2229 struct nvgpu_mem *mem;
2183 struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; 2230 struct ctx_header_desc *ctx = &ch->ctx_header;
2184 struct nvgpu_mem *ctxheader = &ctx->mem; 2231 struct nvgpu_mem *ctxheader = &ctx->mem;
2185 u32 class; 2232 u32 class;
2186 int err = 0; 2233 int err = 0;
@@ -2189,12 +2236,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2189 if (!class) 2236 if (!class)
2190 return -EINVAL; 2237 return -EINVAL;
2191 2238
2192 if (gk20a_is_channel_marked_as_tsg(ch)) { 2239 tsg = tsg_gk20a_from_ch(ch);
2193 tsg = &g->fifo.tsg[ch->tsgid]; 2240 if (!tsg)
2194 vm = tsg->vm; 2241 return -EINVAL;
2195 } else { 2242
2196 vm = ch->vm; 2243 vm = tsg->vm;
2197 } 2244 gr_ctx = &tsg->gr_ctx;
2245 mem = &gr_ctx->mem;
2198 2246
2199 /* skip setting anything if both modes are already set */ 2247 /* skip setting anything if both modes are already set */
2200 if (graphics_preempt_mode && 2248 if (graphics_preempt_mode &&
@@ -2241,15 +2289,15 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2241 2289
2242 if (g->ops.gr.update_ctxsw_preemption_mode) { 2290 if (g->ops.gr.update_ctxsw_preemption_mode) {
2243 g->ops.gr.update_ctxsw_preemption_mode(ch->g, 2291 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2244 ch_ctx, mem); 2292 ch, mem);
2245 2293
2246 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); 2294 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
2247 if (err) { 2295 if (err) {
2248 nvgpu_err(g, "can't map patch context"); 2296 nvgpu_err(g, "can't map patch context");
2249 goto enable_ch; 2297 goto enable_ch;
2250 } 2298 }
2251 g->ops.gr.commit_global_cb_manager(g, ch, true); 2299 g->ops.gr.commit_global_cb_manager(g, ch, true);
2252 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); 2300 gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
2253 } 2301 }
2254 2302
2255enable_ch: 2303enable_ch: