summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorSrirangan <smadhavan@nvidia.com>2018-08-12 03:43:36 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-09-06 05:28:48 -0400
commitbc1ea8c9bfdd8614af3f282a52cbb1b74c4d9544 (patch)
treeaa6e34eb6672a8d53498d8c11cbec446182fd5c7 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent6227e003920ae3bdcf8b0731fbd342158378958a (diff)
nvgpu: gk20a: gr: Fix MISRA 15.6 violations
MISRA Rule-15.6 requires that all if-else blocks be enclosed in braces, including single statement blocks. Fix errors due to single statement if blocks without braces by introducing the braces. JIRA NVGPU-671 Change-Id: Ie4bd8bffdafe6321e35394558dc9559f9c2d05c2 Signed-off-by: Srirangan <smadhavan@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1797689 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c896
1 files changed, 587 insertions, 309 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 7bfc48ad..91ffbb7e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -103,8 +103,9 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
103 struct nvgpu_mem *mem = NULL; 103 struct nvgpu_mem *mem = NULL;
104 104
105 tsg = tsg_gk20a_from_ch(c); 105 tsg = tsg_gk20a_from_ch(c);
106 if (!tsg) 106 if (!tsg) {
107 return -EINVAL; 107 return -EINVAL;
108 }
108 109
109 gr_ctx = &tsg->gr_ctx; 110 gr_ctx = &tsg->gr_ctx;
110 mem = &gr_ctx->mem; 111 mem = &gr_ctx->mem;
@@ -387,8 +388,9 @@ int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
387 u32 delay = expect_delay; 388 u32 delay = expect_delay;
388 struct nvgpu_timeout timeout; 389 struct nvgpu_timeout timeout;
389 390
390 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) 391 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
391 return 0; 392 return 0;
393 }
392 394
393 nvgpu_log_fn(g, " "); 395 nvgpu_log_fn(g, " ");
394 396
@@ -424,41 +426,49 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
424 426
425 nvgpu_log_fn(g, " "); 427 nvgpu_log_fn(g, " ");
426 428
427 if (sleepduringwait) 429 if (sleepduringwait) {
428 delay = GR_IDLE_CHECK_DEFAULT; 430 delay = GR_IDLE_CHECK_DEFAULT;
431 }
429 432
430 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), 433 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
431 NVGPU_TIMER_CPU_TIMER); 434 NVGPU_TIMER_CPU_TIMER);
432 435
433 while (check == WAIT_UCODE_LOOP) { 436 while (check == WAIT_UCODE_LOOP) {
434 if (nvgpu_timeout_expired(&timeout)) 437 if (nvgpu_timeout_expired(&timeout)) {
435 check = WAIT_UCODE_TIMEOUT; 438 check = WAIT_UCODE_TIMEOUT;
439 }
436 440
437 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id)); 441 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
438 442
439 if (mailbox_ret) 443 if (mailbox_ret) {
440 *mailbox_ret = reg; 444 *mailbox_ret = reg;
445 }
441 446
442 switch (opc_success) { 447 switch (opc_success) {
443 case GR_IS_UCODE_OP_EQUAL: 448 case GR_IS_UCODE_OP_EQUAL:
444 if (reg == mailbox_ok) 449 if (reg == mailbox_ok) {
445 check = WAIT_UCODE_OK; 450 check = WAIT_UCODE_OK;
451 }
446 break; 452 break;
447 case GR_IS_UCODE_OP_NOT_EQUAL: 453 case GR_IS_UCODE_OP_NOT_EQUAL:
448 if (reg != mailbox_ok) 454 if (reg != mailbox_ok) {
449 check = WAIT_UCODE_OK; 455 check = WAIT_UCODE_OK;
456 }
450 break; 457 break;
451 case GR_IS_UCODE_OP_AND: 458 case GR_IS_UCODE_OP_AND:
452 if (reg & mailbox_ok) 459 if (reg & mailbox_ok) {
453 check = WAIT_UCODE_OK; 460 check = WAIT_UCODE_OK;
461 }
454 break; 462 break;
455 case GR_IS_UCODE_OP_LESSER: 463 case GR_IS_UCODE_OP_LESSER:
456 if (reg < mailbox_ok) 464 if (reg < mailbox_ok) {
457 check = WAIT_UCODE_OK; 465 check = WAIT_UCODE_OK;
466 }
458 break; 467 break;
459 case GR_IS_UCODE_OP_LESSER_EQUAL: 468 case GR_IS_UCODE_OP_LESSER_EQUAL:
460 if (reg <= mailbox_ok) 469 if (reg <= mailbox_ok) {
461 check = WAIT_UCODE_OK; 470 check = WAIT_UCODE_OK;
471 }
462 break; 472 break;
463 case GR_IS_UCODE_OP_SKIP: 473 case GR_IS_UCODE_OP_SKIP:
464 /* do no success check */ 474 /* do no success check */
@@ -473,24 +483,29 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
473 483
474 switch (opc_fail) { 484 switch (opc_fail) {
475 case GR_IS_UCODE_OP_EQUAL: 485 case GR_IS_UCODE_OP_EQUAL:
476 if (reg == mailbox_fail) 486 if (reg == mailbox_fail) {
477 check = WAIT_UCODE_ERROR; 487 check = WAIT_UCODE_ERROR;
488 }
478 break; 489 break;
479 case GR_IS_UCODE_OP_NOT_EQUAL: 490 case GR_IS_UCODE_OP_NOT_EQUAL:
480 if (reg != mailbox_fail) 491 if (reg != mailbox_fail) {
481 check = WAIT_UCODE_ERROR; 492 check = WAIT_UCODE_ERROR;
493 }
482 break; 494 break;
483 case GR_IS_UCODE_OP_AND: 495 case GR_IS_UCODE_OP_AND:
484 if (reg & mailbox_fail) 496 if (reg & mailbox_fail) {
485 check = WAIT_UCODE_ERROR; 497 check = WAIT_UCODE_ERROR;
498 }
486 break; 499 break;
487 case GR_IS_UCODE_OP_LESSER: 500 case GR_IS_UCODE_OP_LESSER:
488 if (reg < mailbox_fail) 501 if (reg < mailbox_fail) {
489 check = WAIT_UCODE_ERROR; 502 check = WAIT_UCODE_ERROR;
503 }
490 break; 504 break;
491 case GR_IS_UCODE_OP_LESSER_EQUAL: 505 case GR_IS_UCODE_OP_LESSER_EQUAL:
492 if (reg <= mailbox_fail) 506 if (reg <= mailbox_fail) {
493 check = WAIT_UCODE_ERROR; 507 check = WAIT_UCODE_ERROR;
508 }
494 break; 509 break;
495 case GR_IS_UCODE_OP_SKIP: 510 case GR_IS_UCODE_OP_SKIP:
496 /* do no check on fail*/ 511 /* do no check on fail*/
@@ -505,8 +520,9 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
505 if (sleepduringwait) { 520 if (sleepduringwait) {
506 nvgpu_usleep_range(delay, delay * 2); 521 nvgpu_usleep_range(delay, delay * 2);
507 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); 522 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
508 } else 523 } else {
509 nvgpu_udelay(delay); 524 nvgpu_udelay(delay);
525 }
510 } 526 }
511 527
512 if (check == WAIT_UCODE_TIMEOUT) { 528 if (check == WAIT_UCODE_TIMEOUT) {
@@ -539,9 +555,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
539 555
540 nvgpu_mutex_acquire(&gr->fecs_mutex); 556 nvgpu_mutex_acquire(&gr->fecs_mutex);
541 557
542 if (op.mailbox.id != 0) 558 if (op.mailbox.id != 0) {
543 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id), 559 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
544 op.mailbox.data); 560 op.mailbox.data);
561 }
545 562
546 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 563 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
547 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr)); 564 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
@@ -552,16 +569,18 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
552 569
553 /* op.mailbox.id == 4 cases require waiting for completion on 570 /* op.mailbox.id == 4 cases require waiting for completion on
554 * for op.mailbox.id == 0 */ 571 * for op.mailbox.id == 0 */
555 if (op.mailbox.id == 4) 572 if (op.mailbox.id == 4) {
556 op.mailbox.id = 0; 573 op.mailbox.id = 0;
574 }
557 575
558 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, 576 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
559 op.cond.ok, op.mailbox.ok, 577 op.cond.ok, op.mailbox.ok,
560 op.cond.fail, op.mailbox.fail, 578 op.cond.fail, op.mailbox.fail,
561 sleepduringwait); 579 sleepduringwait);
562 if (ret) 580 if (ret) {
563 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x", 581 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x",
564 op.method.data, op.method.addr); 582 op.method.data, op.method.addr);
583 }
565 584
566 nvgpu_mutex_release(&gr->fecs_mutex); 585 nvgpu_mutex_release(&gr->fecs_mutex);
567 586
@@ -588,9 +607,10 @@ int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
588 op.cond.ok, op.mailbox.ok, 607 op.cond.ok, op.mailbox.ok,
589 op.cond.fail, op.mailbox.fail, 608 op.cond.fail, op.mailbox.fail,
590 false); 609 false);
591 if (ret) 610 if (ret) {
592 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x", 611 nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x",
593 op.method.data, op.method.addr); 612 op.method.data, op.method.addr);
613 }
594 614
595 nvgpu_mutex_release(&gr->fecs_mutex); 615 nvgpu_mutex_release(&gr->fecs_mutex);
596 616
@@ -620,9 +640,10 @@ int gr_gk20a_disable_ctxsw(struct gk20a *g)
620 640
621 nvgpu_mutex_acquire(&g->ctxsw_disable_lock); 641 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
622 g->ctxsw_disable_count++; 642 g->ctxsw_disable_count++;
623 if (g->ctxsw_disable_count == 1) 643 if (g->ctxsw_disable_count == 1) {
624 err = gr_gk20a_ctrl_ctxsw(g, 644 err = gr_gk20a_ctrl_ctxsw(g,
625 gr_fecs_method_push_adr_stop_ctxsw_v(), NULL); 645 gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
646 }
626 nvgpu_mutex_release(&g->ctxsw_disable_lock); 647 nvgpu_mutex_release(&g->ctxsw_disable_lock);
627 648
628 return err; 649 return err;
@@ -638,9 +659,10 @@ int gr_gk20a_enable_ctxsw(struct gk20a *g)
638 nvgpu_mutex_acquire(&g->ctxsw_disable_lock); 659 nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
639 g->ctxsw_disable_count--; 660 g->ctxsw_disable_count--;
640 WARN_ON(g->ctxsw_disable_count < 0); 661 WARN_ON(g->ctxsw_disable_count < 0);
641 if (g->ctxsw_disable_count == 0) 662 if (g->ctxsw_disable_count == 0) {
642 err = gr_gk20a_ctrl_ctxsw(g, 663 err = gr_gk20a_ctrl_ctxsw(g,
643 gr_fecs_method_push_adr_start_ctxsw_v(), NULL); 664 gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
665 }
644 nvgpu_mutex_release(&g->ctxsw_disable_lock); 666 nvgpu_mutex_release(&g->ctxsw_disable_lock);
645 667
646 return err; 668 return err;
@@ -779,9 +801,10 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
779 .fail = 0x20, }, 801 .fail = 0x20, },
780 .cond.ok = GR_IS_UCODE_OP_AND, 802 .cond.ok = GR_IS_UCODE_OP_AND,
781 .cond.fail = GR_IS_UCODE_OP_AND}, true); 803 .cond.fail = GR_IS_UCODE_OP_AND}, true);
782 if (ret) 804 if (ret) {
783 nvgpu_err(g, 805 nvgpu_err(g,
784 "bind channel instance failed"); 806 "bind channel instance failed");
807 }
785 808
786 return ret; 809 return ret;
787} 810}
@@ -815,8 +838,9 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
815 nvgpu_log_fn(g, " "); 838 nvgpu_log_fn(g, " ");
816 839
817 tsg = tsg_gk20a_from_ch(c); 840 tsg = tsg_gk20a_from_ch(c);
818 if (!tsg) 841 if (!tsg) {
819 return -EINVAL; 842 return -EINVAL;
843 }
820 844
821 gr_ctx = &tsg->gr_ctx; 845 gr_ctx = &tsg->gr_ctx;
822 mem = &gr_ctx->mem; 846 mem = &gr_ctx->mem;
@@ -843,11 +867,12 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
843 ctxsw_prog_main_image_zcull_o(), 867 ctxsw_prog_main_image_zcull_o(),
844 gr_ctx->zcull_ctx.ctx_sw_mode); 868 gr_ctx->zcull_ctx.ctx_sw_mode);
845 869
846 if (ctxheader->gpu_va) 870 if (ctxheader->gpu_va) {
847 g->ops.gr.write_zcull_ptr(g, ctxheader, 871 g->ops.gr.write_zcull_ptr(g, ctxheader,
848 gr_ctx->zcull_ctx.gpu_va); 872 gr_ctx->zcull_ctx.gpu_va);
849 else 873 } else {
850 g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); 874 g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
875 }
851 876
852 gk20a_enable_channel_tsg(g, c); 877 gk20a_enable_channel_tsg(g, c);
853 878
@@ -883,15 +908,17 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
883 nvgpu_log_fn(g, " "); 908 nvgpu_log_fn(g, " ");
884 909
885 tsg = tsg_gk20a_from_ch(c); 910 tsg = tsg_gk20a_from_ch(c);
886 if (!tsg) 911 if (!tsg) {
887 return -EINVAL; 912 return -EINVAL;
913 }
888 914
889 gr_ctx = &tsg->gr_ctx; 915 gr_ctx = &tsg->gr_ctx;
890 if (patch) { 916 if (patch) {
891 int err; 917 int err;
892 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); 918 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
893 if (err) 919 if (err) {
894 return err; 920 return err;
921 }
895 } 922 }
896 923
897 /* global pagepool buffer */ 924 /* global pagepool buffer */
@@ -903,8 +930,9 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
903 size = gr->global_ctx_buffer[PAGEPOOL].mem.size / 930 size = gr->global_ctx_buffer[PAGEPOOL].mem.size /
904 gr_scc_pagepool_total_pages_byte_granularity_v(); 931 gr_scc_pagepool_total_pages_byte_granularity_v();
905 932
906 if (size == g->ops.gr.pagepool_default_size(g)) 933 if (size == g->ops.gr.pagepool_default_size(g)) {
907 size = gr_scc_pagepool_total_pages_hwmax_v(); 934 size = gr_scc_pagepool_total_pages_hwmax_v();
935 }
908 936
909 nvgpu_log_info(g, "pagepool buffer addr : 0x%016llx, size : %d", 937 nvgpu_log_info(g, "pagepool buffer addr : 0x%016llx, size : %d",
910 addr, size); 938 addr, size);
@@ -934,8 +962,9 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
934 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch); 962 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch);
935 g->ops.gr.commit_global_cb_manager(g, c, patch); 963 g->ops.gr.commit_global_cb_manager(g, c, patch);
936 964
937 if (patch) 965 if (patch) {
938 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); 966 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
967 }
939 968
940 return 0; 969 return 0;
941} 970}
@@ -996,8 +1025,9 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
996 */ 1025 */
997static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index) 1026static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index)
998{ 1027{
999 if (index >= gr->map_tile_count) 1028 if (index >= gr->map_tile_count) {
1000 return 0; 1029 return 0;
1030 }
1001 1031
1002 return gr->map_tiles[index]; 1032 return gr->map_tiles[index];
1003} 1033}
@@ -1008,8 +1038,9 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1008 u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; 1038 u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
1009 u32 map0, map1, map2, map3, map4, map5; 1039 u32 map0, map1, map2, map3, map4, map5;
1010 1040
1011 if (!gr->map_tiles) 1041 if (!gr->map_tiles) {
1012 return -1; 1042 return -1;
1043 }
1013 1044
1014 nvgpu_log_fn(g, " "); 1045 nvgpu_log_fn(g, " ");
1015 1046
@@ -1179,8 +1210,9 @@ int gr_gk20a_init_sm_id_table(struct gk20a *g)
1179 */ 1210 */
1180u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index) 1211u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index)
1181{ 1212{
1182 if (gpc_index >= gr->gpc_count) 1213 if (gpc_index >= gr->gpc_count) {
1183 return 0; 1214 return 0;
1215 }
1184 1216
1185 return gr->gpc_tpc_count[gpc_index]; 1217 return gr->gpc_tpc_count[gpc_index];
1186} 1218}
@@ -1199,12 +1231,14 @@ int gr_gk20a_init_fs_state(struct gk20a *g)
1199 1231
1200 if (g->ops.gr.init_sm_id_table) { 1232 if (g->ops.gr.init_sm_id_table) {
1201 err = g->ops.gr.init_sm_id_table(g); 1233 err = g->ops.gr.init_sm_id_table(g);
1202 if (err) 1234 if (err) {
1203 return err; 1235 return err;
1236 }
1204 1237
1205 /* Is table empty ? */ 1238 /* Is table empty ? */
1206 if (g->gr.no_of_sm == 0) 1239 if (g->gr.no_of_sm == 0) {
1207 return -EINVAL; 1240 return -EINVAL;
1241 }
1208 } 1242 }
1209 1243
1210 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { 1244 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
@@ -1213,8 +1247,9 @@ int gr_gk20a_init_fs_state(struct gk20a *g)
1213 1247
1214 g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); 1248 g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id);
1215 1249
1216 if (g->ops.gr.program_active_tpc_counts) 1250 if (g->ops.gr.program_active_tpc_counts) {
1217 g->ops.gr.program_active_tpc_counts(g, gpc_index); 1251 g->ops.gr.program_active_tpc_counts(g, gpc_index);
1252 }
1218 } 1253 }
1219 1254
1220 for (reg_index = 0, gpc_id = 0; 1255 for (reg_index = 0, gpc_id = 0;
@@ -1237,8 +1272,9 @@ int gr_gk20a_init_fs_state(struct gk20a *g)
1237 1272
1238 /* gr__setup_pd_mapping stubbed for gk20a */ 1273 /* gr__setup_pd_mapping stubbed for gk20a */
1239 g->ops.gr.setup_rop_mapping(g, gr); 1274 g->ops.gr.setup_rop_mapping(g, gr);
1240 if (g->ops.gr.setup_alpha_beta_tables) 1275 if (g->ops.gr.setup_alpha_beta_tables) {
1241 g->ops.gr.setup_alpha_beta_tables(g, gr); 1276 g->ops.gr.setup_alpha_beta_tables(g, gr);
1277 }
1242 1278
1243 for (gpc_index = 0; 1279 for (gpc_index = 0;
1244 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; 1280 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
@@ -1291,8 +1327,9 @@ int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1291 .cond.fail = GR_IS_UCODE_OP_AND, 1327 .cond.fail = GR_IS_UCODE_OP_AND,
1292 }, true); 1328 }, true);
1293 1329
1294 if (ret) 1330 if (ret) {
1295 nvgpu_err(g, "save context image failed"); 1331 nvgpu_err(g, "save context image failed");
1332 }
1296 1333
1297 return ret; 1334 return ret;
1298} 1335}
@@ -1327,26 +1364,30 @@ u32 gk20a_init_sw_bundle(struct gk20a *g)
1327 err = gr_gk20a_wait_idle(g, 1364 err = gr_gk20a_wait_idle(g,
1328 gk20a_get_gr_idle_timeout(g), 1365 gk20a_get_gr_idle_timeout(g),
1329 GR_IDLE_CHECK_DEFAULT); 1366 GR_IDLE_CHECK_DEFAULT);
1330 if (err) 1367 if (err) {
1331 goto error; 1368 goto error;
1369 }
1332 } 1370 }
1333 1371
1334 err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g), 1372 err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g),
1335 GR_IDLE_CHECK_DEFAULT); 1373 GR_IDLE_CHECK_DEFAULT);
1336 if (err) 1374 if (err) {
1337 goto error; 1375 goto error;
1376 }
1338 } 1377 }
1339 1378
1340 if (!err && g->ops.gr.init_sw_veid_bundle) { 1379 if (!err && g->ops.gr.init_sw_veid_bundle) {
1341 err = g->ops.gr.init_sw_veid_bundle(g); 1380 err = g->ops.gr.init_sw_veid_bundle(g);
1342 if (err) 1381 if (err) {
1343 goto error; 1382 goto error;
1383 }
1344 } 1384 }
1345 1385
1346 if (g->ops.gr.init_sw_bundle64) { 1386 if (g->ops.gr.init_sw_bundle64) {
1347 err = g->ops.gr.init_sw_bundle64(g); 1387 err = g->ops.gr.init_sw_bundle64(g);
1348 if (err) 1388 if (err) {
1349 goto error; 1389 goto error;
1390 }
1350 } 1391 }
1351 1392
1352 /* disable pipe mode override */ 1393 /* disable pipe mode override */
@@ -1396,8 +1437,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1396 nvgpu_log_fn(g, " "); 1437 nvgpu_log_fn(g, " ");
1397 1438
1398 tsg = tsg_gk20a_from_ch(c); 1439 tsg = tsg_gk20a_from_ch(c);
1399 if (!tsg) 1440 if (!tsg) {
1400 return -EINVAL; 1441 return -EINVAL;
1442 }
1401 1443
1402 gr_ctx = &tsg->gr_ctx; 1444 gr_ctx = &tsg->gr_ctx;
1403 gr_mem = &gr_ctx->mem; 1445 gr_mem = &gr_ctx->mem;
@@ -1421,8 +1463,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1421 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f()); 1463 gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f());
1422 do { 1464 do {
1423 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r())); 1465 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1424 if (req == gr_fe_pwr_mode_req_done_v()) 1466 if (req == gr_fe_pwr_mode_req_done_v()) {
1425 break; 1467 break;
1468 }
1426 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT); 1469 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT);
1427 } while (!nvgpu_timeout_expired_msg(&timeout, 1470 } while (!nvgpu_timeout_expired_msg(&timeout,
1428 "timeout forcing FE on")); 1471 "timeout forcing FE on"));
@@ -1467,8 +1510,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1467 1510
1468 do { 1511 do {
1469 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r())); 1512 u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r()));
1470 if (req == gr_fe_pwr_mode_req_done_v()) 1513 if (req == gr_fe_pwr_mode_req_done_v()) {
1471 break; 1514 break;
1515 }
1472 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT); 1516 nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT);
1473 } while (!nvgpu_timeout_expired_msg(&timeout, 1517 } while (!nvgpu_timeout_expired_msg(&timeout,
1474 "timeout setting FE power to auto")); 1518 "timeout setting FE power to auto"));
@@ -1479,8 +1523,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1479 gr_scc_init_ram_trigger_f()); 1523 gr_scc_init_ram_trigger_f());
1480 1524
1481 err = gr_gk20a_fecs_ctx_bind_channel(g, c); 1525 err = gr_gk20a_fecs_ctx_bind_channel(g, c);
1482 if (err) 1526 if (err) {
1483 goto clean_up; 1527 goto clean_up;
1528 }
1484 1529
1485 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 1530 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1486 GR_IDLE_CHECK_DEFAULT); 1531 GR_IDLE_CHECK_DEFAULT);
@@ -1491,41 +1536,48 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1491 sw_ctx_load->l[i].value); 1536 sw_ctx_load->l[i].value);
1492 } 1537 }
1493 1538
1494 if (g->ops.gr.init_preemption_state) 1539 if (g->ops.gr.init_preemption_state) {
1495 g->ops.gr.init_preemption_state(g); 1540 g->ops.gr.init_preemption_state(g);
1541 }
1496 1542
1497 if (g->ops.clock_gating.blcg_gr_load_gating_prod) 1543 if (g->ops.clock_gating.blcg_gr_load_gating_prod) {
1498 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); 1544 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
1545 }
1499 1546
1500 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 1547 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1501 GR_IDLE_CHECK_DEFAULT); 1548 GR_IDLE_CHECK_DEFAULT);
1502 if (err) 1549 if (err) {
1503 goto clean_up; 1550 goto clean_up;
1551 }
1504 1552
1505 /* disable fe_go_idle */ 1553 /* disable fe_go_idle */
1506 gk20a_writel(g, gr_fe_go_idle_timeout_r(), 1554 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
1507 gr_fe_go_idle_timeout_count_disabled_f()); 1555 gr_fe_go_idle_timeout_count_disabled_f());
1508 1556
1509 err = g->ops.gr.commit_global_ctx_buffers(g, c, false); 1557 err = g->ops.gr.commit_global_ctx_buffers(g, c, false);
1510 if (err) 1558 if (err) {
1511 goto clean_up; 1559 goto clean_up;
1560 }
1512 1561
1513 /* override a few ctx state registers */ 1562 /* override a few ctx state registers */
1514 g->ops.gr.commit_global_timeslice(g, c); 1563 g->ops.gr.commit_global_timeslice(g, c);
1515 1564
1516 /* floorsweep anything left */ 1565 /* floorsweep anything left */
1517 err = g->ops.gr.init_fs_state(g); 1566 err = g->ops.gr.init_fs_state(g);
1518 if (err) 1567 if (err) {
1519 goto clean_up; 1568 goto clean_up;
1569 }
1520 1570
1521 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 1571 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1522 GR_IDLE_CHECK_DEFAULT); 1572 GR_IDLE_CHECK_DEFAULT);
1523 if (err) 1573 if (err) {
1524 goto restore_fe_go_idle; 1574 goto restore_fe_go_idle;
1575 }
1525 1576
1526 err = gk20a_init_sw_bundle(g); 1577 err = gk20a_init_sw_bundle(g);
1527 if (err) 1578 if (err) {
1528 goto clean_up; 1579 goto clean_up;
1580 }
1529 1581
1530restore_fe_go_idle: 1582restore_fe_go_idle:
1531 /* restore fe_go_idle */ 1583 /* restore fe_go_idle */
@@ -1533,8 +1585,9 @@ restore_fe_go_idle:
1533 gr_fe_go_idle_timeout_count_prod_f()); 1585 gr_fe_go_idle_timeout_count_prod_f());
1534 1586
1535 if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 1587 if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1536 GR_IDLE_CHECK_DEFAULT)) 1588 GR_IDLE_CHECK_DEFAULT)) {
1537 goto clean_up; 1589 goto clean_up;
1590 }
1538 1591
1539 /* load method init */ 1592 /* load method init */
1540 if (sw_method_init->count) { 1593 if (sw_method_init->count) {
@@ -1558,8 +1611,9 @@ restore_fe_go_idle:
1558 1611
1559 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 1612 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
1560 GR_IDLE_CHECK_DEFAULT); 1613 GR_IDLE_CHECK_DEFAULT);
1561 if (err) 1614 if (err) {
1562 goto clean_up; 1615 goto clean_up;
1616 }
1563 1617
1564 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1618 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1565 ctx_header_words >>= 2; 1619 ctx_header_words >>= 2;
@@ -1576,8 +1630,9 @@ restore_fe_go_idle:
1576 g->ops.gr.write_zcull_ptr(g, gold_mem, 0); 1630 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1577 1631
1578 err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1632 err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1579 if (err) 1633 if (err) {
1580 goto clean_up; 1634 goto clean_up;
1635 }
1581 1636
1582 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); 1637 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
1583 1638
@@ -1599,8 +1654,9 @@ restore_fe_go_idle:
1599 } 1654 }
1600 1655
1601 err = g->ops.gr.commit_inst(c, gr_mem->gpu_va); 1656 err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
1602 if (err) 1657 if (err) {
1603 goto clean_up; 1658 goto clean_up;
1659 }
1604 1660
1605 gr->ctx_vars.golden_image_initialized = true; 1661 gr->ctx_vars.golden_image_initialized = true;
1606 1662
@@ -1608,10 +1664,11 @@ restore_fe_go_idle:
1608 gr_fecs_current_ctx_valid_false_f()); 1664 gr_fecs_current_ctx_valid_false_f());
1609 1665
1610clean_up: 1666clean_up:
1611 if (err) 1667 if (err) {
1612 nvgpu_err(g, "fail"); 1668 nvgpu_err(g, "fail");
1613 else 1669 } else {
1614 nvgpu_log_fn(g, "done"); 1670 nvgpu_log_fn(g, "done");
1671 }
1615 1672
1616 nvgpu_mutex_release(&gr->ctx_mutex); 1673 nvgpu_mutex_release(&gr->ctx_mutex);
1617 return err; 1674 return err;
@@ -1630,8 +1687,9 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1630 nvgpu_log_fn(g, " "); 1687 nvgpu_log_fn(g, " ");
1631 1688
1632 tsg = tsg_gk20a_from_ch(c); 1689 tsg = tsg_gk20a_from_ch(c);
1633 if (!tsg) 1690 if (!tsg) {
1634 return -EINVAL; 1691 return -EINVAL;
1692 }
1635 1693
1636 gr_ctx = &tsg->gr_ctx; 1694 gr_ctx = &tsg->gr_ctx;
1637 mem = &gr_ctx->mem; 1695 mem = &gr_ctx->mem;
@@ -1689,8 +1747,9 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1689 nvgpu_log_fn(g, " "); 1747 nvgpu_log_fn(g, " ");
1690 1748
1691 tsg = tsg_gk20a_from_ch(c); 1749 tsg = tsg_gk20a_from_ch(c);
1692 if (!tsg) 1750 if (!tsg) {
1693 return -EINVAL; 1751 return -EINVAL;
1752 }
1694 1753
1695 gr_ctx = &tsg->gr_ctx; 1754 gr_ctx = &tsg->gr_ctx;
1696 pm_ctx = &gr_ctx->pm_ctx; 1755 pm_ctx = &gr_ctx->pm_ctx;
@@ -1800,10 +1859,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1800 1859
1801 nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); 1860 nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1802 1861
1803 if (ctxheader->gpu_va) 1862 if (ctxheader->gpu_va) {
1804 g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr); 1863 g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
1805 else 1864 } else {
1806 g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr); 1865 g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
1866 }
1807 1867
1808 /* enable channel */ 1868 /* enable channel */
1809 gk20a_enable_channel_tsg(g, c); 1869 gk20a_enable_channel_tsg(g, c);
@@ -1837,13 +1897,15 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1837 nvgpu_log_fn(g, " "); 1897 nvgpu_log_fn(g, " ");
1838 1898
1839 tsg = tsg_gk20a_from_ch(c); 1899 tsg = tsg_gk20a_from_ch(c);
1840 if (!tsg) 1900 if (!tsg) {
1841 return -EINVAL; 1901 return -EINVAL;
1902 }
1842 1903
1843 gr_ctx = &tsg->gr_ctx; 1904 gr_ctx = &tsg->gr_ctx;
1844 mem = &gr_ctx->mem; 1905 mem = &gr_ctx->mem;
1845 if (gr->ctx_vars.local_golden_image == NULL) 1906 if (gr->ctx_vars.local_golden_image == NULL) {
1846 return -EINVAL; 1907 return -EINVAL;
1908 }
1847 1909
1848 /* Channel gr_ctx buffer is gpu cacheable. 1910 /* Channel gr_ctx buffer is gpu cacheable.
1849 Flush and invalidate before cpu update. */ 1911 Flush and invalidate before cpu update. */
@@ -1853,11 +1915,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1853 gr->ctx_vars.local_golden_image, 1915 gr->ctx_vars.local_golden_image,
1854 gr->ctx_vars.golden_image_size); 1916 gr->ctx_vars.golden_image_size);
1855 1917
1856 if (g->ops.gr.init_ctxsw_hdr_data) 1918 if (g->ops.gr.init_ctxsw_hdr_data) {
1857 g->ops.gr.init_ctxsw_hdr_data(g, mem); 1919 g->ops.gr.init_ctxsw_hdr_data(g, mem);
1920 }
1858 1921
1859 if (g->ops.gr.enable_cde_in_fecs && c->cde) 1922 if (g->ops.gr.enable_cde_in_fecs && c->cde) {
1860 g->ops.gr.enable_cde_in_fecs(g, mem); 1923 g->ops.gr.enable_cde_in_fecs(g, mem);
1924 }
1861 1925
1862 /* set priv access map */ 1926 /* set priv access map */
1863 virt_addr_lo = 1927 virt_addr_lo =
@@ -1865,10 +1929,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1865 virt_addr_hi = 1929 virt_addr_hi =
1866 u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); 1930 u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1867 1931
1868 if (g->allow_all) 1932 if (g->allow_all) {
1869 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); 1933 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
1870 else 1934 } else {
1871 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); 1935 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
1936 }
1872 1937
1873 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), 1938 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1874 data); 1939 data);
@@ -1886,11 +1951,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1886 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); 1951 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
1887 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); 1952 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1888 1953
1889 if (g->ops.gr.update_ctxsw_preemption_mode) 1954 if (g->ops.gr.update_ctxsw_preemption_mode) {
1890 g->ops.gr.update_ctxsw_preemption_mode(g, c, mem); 1955 g->ops.gr.update_ctxsw_preemption_mode(g, c, mem);
1956 }
1891 1957
1892 if (g->ops.gr.update_boosted_ctx) 1958 if (g->ops.gr.update_boosted_ctx) {
1893 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); 1959 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
1960 }
1894 1961
1895 virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); 1962 virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
1896 virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); 1963 virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
@@ -1919,8 +1986,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1919 } 1986 }
1920 1987
1921 virt_addr = gr_ctx->pm_ctx.mem.gpu_va; 1988 virt_addr = gr_ctx->pm_ctx.mem.gpu_va;
1922 } else 1989 } else {
1923 virt_addr = 0; 1990 virt_addr = 0;
1991 }
1924 1992
1925 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); 1993 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1926 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1994 data = data & ~ctxsw_prog_main_image_pm_mode_m();
@@ -1957,8 +2025,9 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1957 int err; 2025 int err;
1958 2026
1959 err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc); 2027 err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
1960 if (err) 2028 if (err) {
1961 return err; 2029 return err;
2030 }
1962 2031
1963 g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0); 2032 g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
1964 2033
@@ -2070,8 +2139,9 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2070 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); 2139 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
2071 2140
2072 err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc); 2141 err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc);
2073 if (err) 2142 if (err) {
2074 goto clean_up; 2143 goto clean_up;
2144 }
2075 2145
2076 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, 2146 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2077 &ucode_info->fecs, 2147 &ucode_info->fecs,
@@ -2092,15 +2162,17 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2092 gpccs_fw = NULL; 2162 gpccs_fw = NULL;
2093 2163
2094 err = gr_gk20a_init_ctxsw_ucode_vaspace(g); 2164 err = gr_gk20a_init_ctxsw_ucode_vaspace(g);
2095 if (err) 2165 if (err) {
2096 goto clean_up; 2166 goto clean_up;
2167 }
2097 2168
2098 return 0; 2169 return 0;
2099 2170
2100clean_up: 2171clean_up:
2101 if (ucode_info->surface_desc.gpu_va) 2172 if (ucode_info->surface_desc.gpu_va) {
2102 nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc, 2173 nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc,
2103 ucode_info->surface_desc.gpu_va); 2174 ucode_info->surface_desc.gpu_va);
2175 }
2104 nvgpu_dma_free(g, &ucode_info->surface_desc); 2176 nvgpu_dma_free(g, &ucode_info->surface_desc);
2105 2177
2106 nvgpu_release_firmware(g, gpccs_fw); 2178 nvgpu_release_firmware(g, gpccs_fw);
@@ -2123,9 +2195,10 @@ static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g)
2123 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); 2195 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2124 } 2196 }
2125 2197
2126 if (!retries) 2198 if (!retries) {
2127 nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x", 2199 nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
2128 gk20a_readl(g, gr_fecs_arb_ctx_cmd_r())); 2200 gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
2201 }
2129 2202
2130 retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT; 2203 retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
2131 while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & 2204 while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
@@ -2358,8 +2431,9 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2358 if (!g->gr.skip_ucode_init) { 2431 if (!g->gr.skip_ucode_init) {
2359 err = gr_gk20a_init_ctxsw_ucode(g); 2432 err = gr_gk20a_init_ctxsw_ucode(g);
2360 2433
2361 if (err) 2434 if (err) {
2362 return err; 2435 return err;
2436 }
2363 } 2437 }
2364 gr_gk20a_load_falcon_with_bootloader(g); 2438 gr_gk20a_load_falcon_with_bootloader(g);
2365 g->gr.skip_ucode_init = true; 2439 g->gr.skip_ucode_init = true;
@@ -2384,9 +2458,10 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
2384 } 2458 }
2385 2459
2386 if (nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP) || 2460 if (nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP) ||
2387 nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) 2461 nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
2388 gk20a_writel(g, gr_fecs_current_ctx_r(), 2462 gk20a_writel(g, gr_fecs_current_ctx_r(),
2389 gr_fecs_current_ctx_valid_false_f()); 2463 gr_fecs_current_ctx_valid_false_f());
2464 }
2390 2465
2391 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff); 2466 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
2392 gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff); 2467 gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
@@ -2452,8 +2527,9 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
2452void gk20a_gr_destroy_ctx_buffer(struct gk20a *g, 2527void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
2453 struct gr_ctx_buffer_desc *desc) 2528 struct gr_ctx_buffer_desc *desc)
2454{ 2529{
2455 if (!desc) 2530 if (!desc) {
2456 return; 2531 return;
2532 }
2457 nvgpu_dma_free(g, &desc->mem); 2533 nvgpu_dma_free(g, &desc->mem);
2458 desc->destroy = NULL; 2534 desc->destroy = NULL;
2459} 2535}
@@ -2466,12 +2542,14 @@ int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
2466 2542
2467 nvgpu_log_fn(g, " "); 2543 nvgpu_log_fn(g, " ");
2468 2544
2469 if (nvgpu_mem_is_valid(&desc->mem)) 2545 if (nvgpu_mem_is_valid(&desc->mem)) {
2470 return 0; 2546 return 0;
2547 }
2471 2548
2472 err = nvgpu_dma_alloc_sys(g, size, &desc->mem); 2549 err = nvgpu_dma_alloc_sys(g, size, &desc->mem);
2473 if (err) 2550 if (err) {
2474 return err; 2551 return err;
2552 }
2475 2553
2476 desc->destroy = gk20a_gr_destroy_ctx_buffer; 2554 desc->destroy = gk20a_gr_destroy_ctx_buffer;
2477 2555
@@ -2513,45 +2591,51 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2513 2591
2514 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[CIRCULAR], 2592 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[CIRCULAR],
2515 cb_buffer_size); 2593 cb_buffer_size);
2516 if (err) 2594 if (err) {
2517 goto clean_up; 2595 goto clean_up;
2596 }
2518 2597
2519 if (g->ops.secure_alloc) { 2598 if (g->ops.secure_alloc) {
2520 err = g->ops.secure_alloc(g, 2599 err = g->ops.secure_alloc(g,
2521 &gr->global_ctx_buffer[CIRCULAR_VPR], 2600 &gr->global_ctx_buffer[CIRCULAR_VPR],
2522 cb_buffer_size); 2601 cb_buffer_size);
2523 if (err) 2602 if (err) {
2524 goto clean_up; 2603 goto clean_up;
2604 }
2525 } 2605 }
2526 2606
2527 nvgpu_log_info(g, "pagepool_buffer_size : %d", pagepool_buffer_size); 2607 nvgpu_log_info(g, "pagepool_buffer_size : %d", pagepool_buffer_size);
2528 2608
2529 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[PAGEPOOL], 2609 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[PAGEPOOL],
2530 pagepool_buffer_size); 2610 pagepool_buffer_size);
2531 if (err) 2611 if (err) {
2532 goto clean_up; 2612 goto clean_up;
2613 }
2533 2614
2534 if (g->ops.secure_alloc) { 2615 if (g->ops.secure_alloc) {
2535 err = g->ops.secure_alloc(g, 2616 err = g->ops.secure_alloc(g,
2536 &gr->global_ctx_buffer[PAGEPOOL_VPR], 2617 &gr->global_ctx_buffer[PAGEPOOL_VPR],
2537 pagepool_buffer_size); 2618 pagepool_buffer_size);
2538 if (err) 2619 if (err) {
2539 goto clean_up; 2620 goto clean_up;
2621 }
2540 } 2622 }
2541 2623
2542 nvgpu_log_info(g, "attr_buffer_size : %d", attr_buffer_size); 2624 nvgpu_log_info(g, "attr_buffer_size : %d", attr_buffer_size);
2543 2625
2544 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[ATTRIBUTE], 2626 err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[ATTRIBUTE],
2545 attr_buffer_size); 2627 attr_buffer_size);
2546 if (err) 2628 if (err) {
2547 goto clean_up; 2629 goto clean_up;
2630 }
2548 2631
2549 if (g->ops.secure_alloc) { 2632 if (g->ops.secure_alloc) {
2550 err = g->ops.secure_alloc(g, 2633 err = g->ops.secure_alloc(g,
2551 &gr->global_ctx_buffer[ATTRIBUTE_VPR], 2634 &gr->global_ctx_buffer[ATTRIBUTE_VPR],
2552 attr_buffer_size); 2635 attr_buffer_size);
2553 if (err) 2636 if (err) {
2554 goto clean_up; 2637 goto clean_up;
2638 }
2555 } 2639 }
2556 2640
2557 nvgpu_log_info(g, "golden_image_size : %d", 2641 nvgpu_log_info(g, "golden_image_size : %d",
@@ -2560,8 +2644,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2560 err = gk20a_gr_alloc_ctx_buffer(g, 2644 err = gk20a_gr_alloc_ctx_buffer(g,
2561 &gr->global_ctx_buffer[GOLDEN_CTX], 2645 &gr->global_ctx_buffer[GOLDEN_CTX],
2562 gr->ctx_vars.golden_image_size); 2646 gr->ctx_vars.golden_image_size);
2563 if (err) 2647 if (err) {
2564 goto clean_up; 2648 goto clean_up;
2649 }
2565 2650
2566 nvgpu_log_info(g, "priv_access_map_size : %d", 2651 nvgpu_log_info(g, "priv_access_map_size : %d",
2567 gr->ctx_vars.priv_access_map_size); 2652 gr->ctx_vars.priv_access_map_size);
@@ -2570,8 +2655,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2570 &gr->global_ctx_buffer[PRIV_ACCESS_MAP], 2655 &gr->global_ctx_buffer[PRIV_ACCESS_MAP],
2571 gr->ctx_vars.priv_access_map_size); 2656 gr->ctx_vars.priv_access_map_size);
2572 2657
2573 if (err) 2658 if (err) {
2574 goto clean_up; 2659 goto clean_up;
2660 }
2575 2661
2576#ifdef CONFIG_GK20A_CTXSW_TRACE 2662#ifdef CONFIG_GK20A_CTXSW_TRACE
2577 nvgpu_log_info(g, "fecs_trace_buffer_size : %d", 2663 nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
@@ -2644,8 +2730,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2644 nvgpu_log_fn(g, " "); 2730 nvgpu_log_fn(g, " ");
2645 2731
2646 tsg = tsg_gk20a_from_ch(c); 2732 tsg = tsg_gk20a_from_ch(c);
2647 if (!tsg) 2733 if (!tsg) {
2648 return -EINVAL; 2734 return -EINVAL;
2735 }
2649 2736
2650 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; 2737 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
2651 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; 2738 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
@@ -2664,8 +2751,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2664 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 2751 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2665 NVGPU_VM_MAP_CACHEABLE, 2752 NVGPU_VM_MAP_CACHEABLE,
2666 gk20a_mem_flag_none, true, mem->aperture); 2753 gk20a_mem_flag_none, true, mem->aperture);
2667 if (!gpu_va) 2754 if (!gpu_va) {
2668 goto clean_up; 2755 goto clean_up;
2756 }
2669 g_bfr_va[CIRCULAR_VA] = gpu_va; 2757 g_bfr_va[CIRCULAR_VA] = gpu_va;
2670 g_bfr_size[CIRCULAR_VA] = mem->size; 2758 g_bfr_size[CIRCULAR_VA] = mem->size;
2671 2759
@@ -2682,8 +2770,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2682 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 2770 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2683 NVGPU_VM_MAP_CACHEABLE, 2771 NVGPU_VM_MAP_CACHEABLE,
2684 gk20a_mem_flag_none, false, mem->aperture); 2772 gk20a_mem_flag_none, false, mem->aperture);
2685 if (!gpu_va) 2773 if (!gpu_va) {
2686 goto clean_up; 2774 goto clean_up;
2775 }
2687 g_bfr_va[ATTRIBUTE_VA] = gpu_va; 2776 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
2688 g_bfr_size[ATTRIBUTE_VA] = mem->size; 2777 g_bfr_size[ATTRIBUTE_VA] = mem->size;
2689 2778
@@ -2700,8 +2789,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2700 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 2789 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
2701 NVGPU_VM_MAP_CACHEABLE, 2790 NVGPU_VM_MAP_CACHEABLE,
2702 gk20a_mem_flag_none, true, mem->aperture); 2791 gk20a_mem_flag_none, true, mem->aperture);
2703 if (!gpu_va) 2792 if (!gpu_va) {
2704 goto clean_up; 2793 goto clean_up;
2794 }
2705 g_bfr_va[PAGEPOOL_VA] = gpu_va; 2795 g_bfr_va[PAGEPOOL_VA] = gpu_va;
2706 g_bfr_size[PAGEPOOL_VA] = mem->size; 2796 g_bfr_size[PAGEPOOL_VA] = mem->size;
2707 2797
@@ -2709,8 +2799,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2709 mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; 2799 mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
2710 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, 2800 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2711 gk20a_mem_flag_none, true, mem->aperture); 2801 gk20a_mem_flag_none, true, mem->aperture);
2712 if (!gpu_va) 2802 if (!gpu_va) {
2713 goto clean_up; 2803 goto clean_up;
2804 }
2714 g_bfr_va[GOLDEN_CTX_VA] = gpu_va; 2805 g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
2715 g_bfr_size[GOLDEN_CTX_VA] = mem->size; 2806 g_bfr_size[GOLDEN_CTX_VA] = mem->size;
2716 g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX; 2807 g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX;
@@ -2719,8 +2810,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2719 mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; 2810 mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
2720 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, 2811 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
2721 gk20a_mem_flag_none, true, mem->aperture); 2812 gk20a_mem_flag_none, true, mem->aperture);
2722 if (!gpu_va) 2813 if (!gpu_va) {
2723 goto clean_up; 2814 goto clean_up;
2815 }
2724 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; 2816 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
2725 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; 2817 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size;
2726 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; 2818 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
@@ -2759,16 +2851,18 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2759 2851
2760 nvgpu_log_fn(g, " "); 2852 nvgpu_log_fn(g, " ");
2761 2853
2762 if (gr->ctx_vars.buffer_size == 0) 2854 if (gr->ctx_vars.buffer_size == 0) {
2763 return 0; 2855 return 0;
2856 }
2764 2857
2765 /* alloc channel gr ctx buffer */ 2858 /* alloc channel gr ctx buffer */
2766 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; 2859 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2767 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; 2860 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2768 2861
2769 err = nvgpu_dma_alloc(g, gr->ctx_vars.buffer_total_size, &gr_ctx->mem); 2862 err = nvgpu_dma_alloc(g, gr->ctx_vars.buffer_total_size, &gr_ctx->mem);
2770 if (err) 2863 if (err) {
2771 return err; 2864 return err;
2865 }
2772 2866
2773 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, 2867 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
2774 &gr_ctx->mem, 2868 &gr_ctx->mem,
@@ -2776,8 +2870,9 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2776 0, /* not GPU-cacheable */ 2870 0, /* not GPU-cacheable */
2777 gk20a_mem_flag_none, true, 2871 gk20a_mem_flag_none, true,
2778 gr_ctx->mem.aperture); 2872 gr_ctx->mem.aperture);
2779 if (!gr_ctx->mem.gpu_va) 2873 if (!gr_ctx->mem.gpu_va) {
2780 goto err_free_mem; 2874 goto err_free_mem;
2875 }
2781 2876
2782 return 0; 2877 return 0;
2783 2878
@@ -2799,8 +2894,9 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2799 } 2894 }
2800 2895
2801 err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding); 2896 err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding);
2802 if (err) 2897 if (err) {
2803 return err; 2898 return err;
2899 }
2804 2900
2805 gr_ctx->tsgid = tsg->tsgid; 2901 gr_ctx->tsgid = tsg->tsgid;
2806 2902
@@ -2818,8 +2914,9 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g,
2818 gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx); 2914 gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx);
2819 2915
2820 if (g->ops.gr.dump_ctxsw_stats && 2916 if (g->ops.gr.dump_ctxsw_stats &&
2821 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) 2917 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) {
2822 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); 2918 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx);
2919 }
2823 2920
2824 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); 2921 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
2825 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); 2922 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
@@ -2859,8 +2956,9 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2859 nvgpu_log_fn(g, " "); 2956 nvgpu_log_fn(g, " ");
2860 2957
2861 tsg = tsg_gk20a_from_ch(c); 2958 tsg = tsg_gk20a_from_ch(c);
2862 if (!tsg) 2959 if (!tsg) {
2863 return -EINVAL; 2960 return -EINVAL;
2961 }
2864 2962
2865 patch_ctx = &tsg->gr_ctx.patch_ctx; 2963 patch_ctx = &tsg->gr_ctx.patch_ctx;
2866 alloc_size = g->ops.gr.get_patch_slots(g) * 2964 alloc_size = g->ops.gr.get_patch_slots(g) *
@@ -2871,8 +2969,9 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2871 2969
2872 err = nvgpu_dma_alloc_map_sys(ch_vm, 2970 err = nvgpu_dma_alloc_map_sys(ch_vm,
2873 alloc_size * sizeof(u32), &patch_ctx->mem); 2971 alloc_size * sizeof(u32), &patch_ctx->mem);
2874 if (err) 2972 if (err) {
2875 return err; 2973 return err;
2974 }
2876 2975
2877 nvgpu_log_fn(g, "done"); 2976 nvgpu_log_fn(g, "done");
2878 return 0; 2977 return 0;
@@ -2886,9 +2985,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
2886 2985
2887 nvgpu_log_fn(g, " "); 2986 nvgpu_log_fn(g, " ");
2888 2987
2889 if (patch_ctx->mem.gpu_va) 2988 if (patch_ctx->mem.gpu_va) {
2890 nvgpu_gmmu_unmap(vm, &patch_ctx->mem, 2989 nvgpu_gmmu_unmap(vm, &patch_ctx->mem,
2891 patch_ctx->mem.gpu_va); 2990 patch_ctx->mem.gpu_va);
2991 }
2892 2992
2893 nvgpu_dma_free(g, &patch_ctx->mem); 2993 nvgpu_dma_free(g, &patch_ctx->mem);
2894 patch_ctx->data_count = 0; 2994 patch_ctx->data_count = 0;
@@ -2935,8 +3035,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
2935 } 3035 }
2936 c->obj_class = class_num; 3036 c->obj_class = class_num;
2937 3037
2938 if (!gk20a_is_channel_marked_as_tsg(c)) 3038 if (!gk20a_is_channel_marked_as_tsg(c)) {
2939 return -EINVAL; 3039 return -EINVAL;
3040 }
2940 3041
2941 tsg = &f->tsg[c->tsgid]; 3042 tsg = &f->tsg[c->tsgid];
2942 gr_ctx = &tsg->gr_ctx; 3043 gr_ctx = &tsg->gr_ctx;
@@ -3007,8 +3108,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3007 } 3108 }
3008#endif 3109#endif
3009 3110
3010 if (g->ops.gr.set_czf_bypass) 3111 if (g->ops.gr.set_czf_bypass) {
3011 g->ops.gr.set_czf_bypass(g, c); 3112 g->ops.gr.set_czf_bypass(g, c);
3113 }
3012 3114
3013 /* PM ctxt switch is off by default */ 3115 /* PM ctxt switch is off by default */
3014 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); 3116 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
@@ -3112,8 +3214,9 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3112 nvgpu_vfree(g, gr->ctx_vars.local_golden_image); 3214 nvgpu_vfree(g, gr->ctx_vars.local_golden_image);
3113 gr->ctx_vars.local_golden_image = NULL; 3215 gr->ctx_vars.local_golden_image = NULL;
3114 3216
3115 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) 3217 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) {
3116 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); 3218 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3219 }
3117 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3220 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3118 3221
3119 gk20a_comptag_allocator_destroy(g, &gr->comp_tags); 3222 gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
@@ -3146,8 +3249,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3146 if (gr->fbp_rop_l2_en_mask == NULL) { 3249 if (gr->fbp_rop_l2_en_mask == NULL) {
3147 gr->fbp_rop_l2_en_mask = 3250 gr->fbp_rop_l2_en_mask =
3148 nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32)); 3251 nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
3149 if (!gr->fbp_rop_l2_en_mask) 3252 if (!gr->fbp_rop_l2_en_mask) {
3150 goto clean_up; 3253 goto clean_up;
3254 }
3151 } else { 3255 } else {
3152 memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count * 3256 memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count *
3153 sizeof(u32)); 3257 sizeof(u32));
@@ -3166,8 +3270,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3166 3270
3167 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); 3271 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
3168 if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, 3272 if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
3169 "too many pes per gpc\n")) 3273 "too many pes per gpc\n")) {
3170 goto clean_up; 3274 goto clean_up;
3275 }
3171 3276
3172 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); 3277 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS);
3173 3278
@@ -3176,45 +3281,51 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3176 goto clean_up; 3281 goto clean_up;
3177 } 3282 }
3178 3283
3179 if (gr->gpc_tpc_count == NULL) 3284 if (gr->gpc_tpc_count == NULL) {
3180 gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * 3285 gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count *
3181 sizeof(u32)); 3286 sizeof(u32));
3182 else 3287 } else {
3183 memset(gr->gpc_tpc_count, 0, gr->gpc_count * 3288 memset(gr->gpc_tpc_count, 0, gr->gpc_count *
3184 sizeof(u32)); 3289 sizeof(u32));
3290 }
3185 3291
3186 if (gr->gpc_tpc_mask == NULL) 3292 if (gr->gpc_tpc_mask == NULL) {
3187 gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count * 3293 gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count *
3188 sizeof(u32)); 3294 sizeof(u32));
3189 else 3295 } else {
3190 memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count * 3296 memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count *
3191 sizeof(u32)); 3297 sizeof(u32));
3298 }
3192 3299
3193 if (gr->gpc_zcb_count == NULL) 3300 if (gr->gpc_zcb_count == NULL) {
3194 gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * 3301 gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count *
3195 sizeof(u32)); 3302 sizeof(u32));
3196 else 3303 } else {
3197 memset(gr->gpc_zcb_count, 0, gr->gpc_count * 3304 memset(gr->gpc_zcb_count, 0, gr->gpc_count *
3198 sizeof(u32)); 3305 sizeof(u32));
3306 }
3199 3307
3200 if (gr->gpc_ppc_count == NULL) 3308 if (gr->gpc_ppc_count == NULL) {
3201 gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * 3309 gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count *
3202 sizeof(u32)); 3310 sizeof(u32));
3203 else 3311 } else {
3204 memset(gr->gpc_ppc_count, 0, gr->gpc_count * 3312 memset(gr->gpc_ppc_count, 0, gr->gpc_count *
3205 sizeof(u32)); 3313 sizeof(u32));
3314 }
3206 3315
3207 if (gr->gpc_skip_mask == NULL) 3316 if (gr->gpc_skip_mask == NULL) {
3208 gr->gpc_skip_mask = 3317 gr->gpc_skip_mask =
3209 nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() * 3318 nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() *
3210 4 * sizeof(u32)); 3319 4 * sizeof(u32));
3211 else 3320 } else {
3212 memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() * 3321 memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() *
3213 4 * sizeof(u32)); 3322 4 * sizeof(u32));
3323 }
3214 3324
3215 if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count || 3325 if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count ||
3216 !gr->gpc_ppc_count || !gr->gpc_skip_mask) 3326 !gr->gpc_ppc_count || !gr->gpc_skip_mask) {
3217 goto clean_up; 3327 goto clean_up;
3328 }
3218 3329
3219 for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) { 3330 for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) {
3220 if (g->ops.gr.get_gpc_tpc_mask) { 3331 if (g->ops.gr.get_gpc_tpc_mask) {
@@ -3247,8 +3358,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3247 nvgpu_kzalloc(g, gr->gpc_count * 3358 nvgpu_kzalloc(g, gr->gpc_count *
3248 sizeof(u32)); 3359 sizeof(u32));
3249 if (!gr->pes_tpc_count[pes_index] || 3360 if (!gr->pes_tpc_count[pes_index] ||
3250 !gr->pes_tpc_mask[pes_index]) 3361 !gr->pes_tpc_mask[pes_index]) {
3251 goto clean_up; 3362 goto clean_up;
3363 }
3252 } 3364 }
3253 3365
3254 tmp = gk20a_readl(g, 3366 tmp = gk20a_readl(g,
@@ -3261,8 +3373,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3261 /* detect PES presence by seeing if there are 3373 /* detect PES presence by seeing if there are
3262 * TPCs connected to it. 3374 * TPCs connected to it.
3263 */ 3375 */
3264 if (pes_tpc_count != 0) 3376 if (pes_tpc_count != 0) {
3265 gr->gpc_ppc_count[gpc_index]++; 3377 gr->gpc_ppc_count[gpc_index]++;
3378 }
3266 3379
3267 gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; 3380 gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
3268 gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; 3381 gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
@@ -3301,14 +3414,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3301 } 3414 }
3302 3415
3303 /* allocate for max tpc per gpc */ 3416 /* allocate for max tpc per gpc */
3304 if (gr->sm_to_cluster == NULL) 3417 if (gr->sm_to_cluster == NULL) {
3305 gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * 3418 gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count *
3306 gr->max_tpc_per_gpc_count * 3419 gr->max_tpc_per_gpc_count *
3307 sm_per_tpc * sizeof(struct sm_info)); 3420 sm_per_tpc * sizeof(struct sm_info));
3308 else 3421 } else {
3309 memset(gr->sm_to_cluster, 0, gr->gpc_count * 3422 memset(gr->sm_to_cluster, 0, gr->gpc_count *
3310 gr->max_tpc_per_gpc_count * 3423 gr->max_tpc_per_gpc_count *
3311 sm_per_tpc * sizeof(struct sm_info)); 3424 sm_per_tpc * sizeof(struct sm_info));
3425 }
3312 gr->no_of_sm = 0; 3426 gr->no_of_sm = 0;
3313 3427
3314 nvgpu_log_info(g, "fbps: %d", gr->num_fbps); 3428 nvgpu_log_info(g, "fbps: %d", gr->num_fbps);
@@ -3387,14 +3501,16 @@ static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
3387 3501
3388 if (!nvgpu_mem_is_valid(&gr->mmu_wr_mem)) { 3502 if (!nvgpu_mem_is_valid(&gr->mmu_wr_mem)) {
3389 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_wr_mem); 3503 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_wr_mem);
3390 if (err) 3504 if (err) {
3391 goto err; 3505 goto err;
3506 }
3392 } 3507 }
3393 3508
3394 if (!nvgpu_mem_is_valid(&gr->mmu_rd_mem)) { 3509 if (!nvgpu_mem_is_valid(&gr->mmu_rd_mem)) {
3395 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_rd_mem); 3510 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_rd_mem);
3396 if (err) 3511 if (err) {
3397 goto err_free_wr_mem; 3512 goto err_free_wr_mem;
3513 }
3398 } 3514 }
3399 return 0; 3515 return 0;
3400 3516
@@ -3446,11 +3562,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3446 3562
3447 gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET; 3563 gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
3448 3564
3449 if (gr->tpc_count == 3) 3565 if (gr->tpc_count == 3) {
3450 gr->map_row_offset = 2; 3566 gr->map_row_offset = 2;
3451 else if (gr->tpc_count < 3) 3567 } else if (gr->tpc_count < 3) {
3452 gr->map_row_offset = 1; 3568 gr->map_row_offset = 1;
3453 else { 3569 } else {
3454 gr->map_row_offset = 3; 3570 gr->map_row_offset = 3;
3455 3571
3456 for (index = 1; index < 18; index++) { 3572 for (index = 1; index < 18; index++) {
@@ -3487,13 +3603,15 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3487 } 3603 }
3488 3604
3489 if (gr->map_tiles) { 3605 if (gr->map_tiles) {
3490 if (gr->map_tile_count != gr->tpc_count) 3606 if (gr->map_tile_count != gr->tpc_count) {
3491 delete_map = true; 3607 delete_map = true;
3608 }
3492 3609
3493 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { 3610 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
3494 if (gr_gk20a_get_map_tile_count(gr, tile_count) 3611 if (gr_gk20a_get_map_tile_count(gr, tile_count)
3495 >= gr->tpc_count) 3612 >= gr->tpc_count) {
3496 delete_map = true; 3613 delete_map = true;
3614 }
3497 } 3615 }
3498 3616
3499 if (delete_map) { 3617 if (delete_map) {
@@ -3540,10 +3658,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3540 } 3658 }
3541 3659
3542 mul_factor = gr->gpc_count * max_tpc_count; 3660 mul_factor = gr->gpc_count * max_tpc_count;
3543 if (mul_factor & 0x1) 3661 if (mul_factor & 0x1) {
3544 mul_factor = 2; 3662 mul_factor = 2;
3545 else 3663 } else {
3546 mul_factor = 1; 3664 mul_factor = 1;
3665 }
3547 3666
3548 comm_denom = gr->gpc_count * max_tpc_count * mul_factor; 3667 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
3549 3668
@@ -3552,10 +3671,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3552 3671
3553 init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor; 3672 init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
3554 3673
3555 if (num_tpc != 0) 3674 if (num_tpc != 0) {
3556 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2; 3675 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
3557 else 3676 } else {
3558 init_err[gpc_index] = 0; 3677 init_err[gpc_index] = 0;
3678 }
3559 3679
3560 run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; 3680 run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
3561 } 3681 }
@@ -3565,8 +3685,9 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3565 if ((run_err[gpc_index] * 2) >= comm_denom) { 3685 if ((run_err[gpc_index] * 2) >= comm_denom) {
3566 gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; 3686 gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
3567 run_err[gpc_index] += init_frac[gpc_index] - comm_denom; 3687 run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
3568 } else 3688 } else {
3569 run_err[gpc_index] += init_frac[gpc_index]; 3689 run_err[gpc_index] += init_frac[gpc_index];
3690 }
3570 } 3691 }
3571 } 3692 }
3572 } 3693 }
@@ -3578,10 +3699,11 @@ clean_up:
3578 nvgpu_kfree(g, sorted_num_tpcs); 3699 nvgpu_kfree(g, sorted_num_tpcs);
3579 nvgpu_kfree(g, sorted_to_unsorted_gpc_map); 3700 nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
3580 3701
3581 if (ret) 3702 if (ret) {
3582 nvgpu_err(g, "fail"); 3703 nvgpu_err(g, "fail");
3583 else 3704 } else {
3584 nvgpu_log_fn(g, "done"); 3705 nvgpu_log_fn(g, "done");
3706 }
3585 3707
3586 return ret; 3708 return ret;
3587} 3709}
@@ -3624,8 +3746,9 @@ int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
3624 struct zcull_ctx_desc *zcull_ctx; 3746 struct zcull_ctx_desc *zcull_ctx;
3625 3747
3626 tsg = tsg_gk20a_from_ch(c); 3748 tsg = tsg_gk20a_from_ch(c);
3627 if (!tsg) 3749 if (!tsg) {
3628 return -EINVAL; 3750 return -EINVAL;
3751 }
3629 3752
3630 zcull_ctx = &tsg->gr_ctx.zcull_ctx; 3753 zcull_ctx = &tsg->gr_ctx.zcull_ctx;
3631 zcull_ctx->ctx_sw_mode = mode; 3754 zcull_ctx->ctx_sw_mode = mode;
@@ -3815,8 +3938,9 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
3815 ret = g->ops.gr.add_zbc_color(g, gr, 3938 ret = g->ops.gr.add_zbc_color(g, gr,
3816 zbc_val, gr->max_used_color_index); 3939 zbc_val, gr->max_used_color_index);
3817 3940
3818 if (!ret) 3941 if (!ret) {
3819 gr->max_used_color_index++; 3942 gr->max_used_color_index++;
3943 }
3820 } 3944 }
3821 break; 3945 break;
3822 case GK20A_ZBC_TYPE_DEPTH: 3946 case GK20A_ZBC_TYPE_DEPTH:
@@ -3845,8 +3969,9 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
3845 ret = g->ops.gr.add_zbc_depth(g, gr, 3969 ret = g->ops.gr.add_zbc_depth(g, gr,
3846 zbc_val, gr->max_used_depth_index); 3970 zbc_val, gr->max_used_depth_index);
3847 3971
3848 if (!ret) 3972 if (!ret) {
3849 gr->max_used_depth_index++; 3973 gr->max_used_depth_index++;
3974 }
3850 } 3975 }
3851 break; 3976 break;
3852 case T19X_ZBC: 3977 case T19X_ZBC:
@@ -3956,8 +4081,9 @@ static int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
3956 4081
3957 ret = g->ops.gr.add_zbc_color(g, gr, &zbc_val, i); 4082 ret = g->ops.gr.add_zbc_color(g, gr, &zbc_val, i);
3958 4083
3959 if (ret) 4084 if (ret) {
3960 return ret; 4085 return ret;
4086 }
3961 } 4087 }
3962 for (i = 0; i < gr->max_used_depth_index; i++) { 4088 for (i = 0; i < gr->max_used_depth_index; i++) {
3963 struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i]; 4089 struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i];
@@ -3968,14 +4094,16 @@ static int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
3968 zbc_val.format = d_tbl->format; 4094 zbc_val.format = d_tbl->format;
3969 4095
3970 ret = g->ops.gr.add_zbc_depth(g, gr, &zbc_val, i); 4096 ret = g->ops.gr.add_zbc_depth(g, gr, &zbc_val, i);
3971 if (ret) 4097 if (ret) {
3972 return ret; 4098 return ret;
4099 }
3973 } 4100 }
3974 4101
3975 if (g->ops.gr.load_zbc_s_tbl) { 4102 if (g->ops.gr.load_zbc_s_tbl) {
3976 ret = g->ops.gr.load_zbc_s_tbl(g, gr); 4103 ret = g->ops.gr.load_zbc_s_tbl(g, gr);
3977 if (ret) 4104 if (ret) {
3978 return ret; 4105 return ret;
4106 }
3979 } 4107 }
3980 4108
3981 return 0; 4109 return 0;
@@ -4131,13 +4259,14 @@ void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config)
4131 (engine_info->engine_enum == ENGINE_GR_GK20A)) { 4259 (engine_info->engine_enum == ENGINE_GR_GK20A)) {
4132 g->ops.therm.init_blcg_mode(g, mode_config, active_engine_id); 4260 g->ops.therm.init_blcg_mode(g, mode_config, active_engine_id);
4133 break; 4261 break;
4134 } else if (cgmode == ELCG_MODE) 4262 } else if (cgmode == ELCG_MODE) {
4135 g->ops.therm.init_elcg_mode(g, mode_config, 4263 g->ops.therm.init_elcg_mode(g, mode_config,
4136 active_engine_id); 4264 active_engine_id);
4137 else 4265 } else {
4138 nvgpu_err(g, "invalid cg mode %d, config %d for " 4266 nvgpu_err(g, "invalid cg mode %d, config %d for "
4139 "act_eng_id %d", 4267 "act_eng_id %d",
4140 cgmode, mode_config, active_engine_id); 4268 cgmode, mode_config, active_engine_id);
4269 }
4141 } 4270 }
4142} 4271}
4143 4272
@@ -4257,8 +4386,9 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4257 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; 4386 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
4258 u32 map_tile_count; 4387 u32 map_tile_count;
4259 4388
4260 if (!gr->map_tiles) 4389 if (!gr->map_tiles) {
4261 return -1; 4390 return -1;
4391 }
4262 4392
4263 if (zcull_alloc_num % 8 != 0) { 4393 if (zcull_alloc_num % 8 != 0) {
4264 /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ 4394 /* Total 8 fields per map reg i.e. tile_0 to tile_7*/
@@ -4288,9 +4418,10 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4288 zcull_bank_counters[map_tile_count]++; 4418 zcull_bank_counters[map_tile_count]++;
4289 } 4419 }
4290 4420
4291 if (g->ops.gr.program_zcull_mapping) 4421 if (g->ops.gr.program_zcull_mapping) {
4292 g->ops.gr.program_zcull_mapping(g, zcull_alloc_num, 4422 g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
4293 zcull_map_tiles); 4423 zcull_map_tiles);
4424 }
4294 4425
4295 nvgpu_kfree(g, zcull_map_tiles); 4426 nvgpu_kfree(g, zcull_map_tiles);
4296 nvgpu_kfree(g, zcull_bank_counters); 4427 nvgpu_kfree(g, zcull_bank_counters);
@@ -4307,8 +4438,9 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4307 return -EINVAL; 4438 return -EINVAL;
4308 } 4439 }
4309 if (gpc_zcull_count != gr->max_zcull_per_gpc_count && 4440 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
4310 gpc_zcull_count != 0) 4441 gpc_zcull_count != 0) {
4311 floorsweep = true; 4442 floorsweep = true;
4443 }
4312 } 4444 }
4313 4445
4314 /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */ 4446 /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */
@@ -4426,8 +4558,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4426 fb_mmu_debug_rd_vol_false_f() | 4558 fb_mmu_debug_rd_vol_false_f() |
4427 fb_mmu_debug_rd_addr_f(addr)); 4559 fb_mmu_debug_rd_addr_f(addr));
4428 4560
4429 if (g->ops.gr.init_gpc_mmu) 4561 if (g->ops.gr.init_gpc_mmu) {
4430 g->ops.gr.init_gpc_mmu(g); 4562 g->ops.gr.init_gpc_mmu(g);
4563 }
4431 4564
4432 /* load gr floorsweeping registers */ 4565 /* load gr floorsweeping registers */
4433 data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); 4566 data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
@@ -4437,8 +4570,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4437 4570
4438 gr_gk20a_zcull_init_hw(g, gr); 4571 gr_gk20a_zcull_init_hw(g, gr);
4439 4572
4440 if (g->ops.priv_ring.set_ppriv_timeout_settings) 4573 if (g->ops.priv_ring.set_ppriv_timeout_settings) {
4441 g->ops.priv_ring.set_ppriv_timeout_settings(g); 4574 g->ops.priv_ring.set_ppriv_timeout_settings(g);
4575 }
4442 4576
4443 /* enable fifo access */ 4577 /* enable fifo access */
4444 gk20a_writel(g, gr_gpfifo_ctl_r(), 4578 gk20a_writel(g, gr_gpfifo_ctl_r(),
@@ -4458,12 +4592,14 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4458 g->ops.gr.set_hww_esr_report_mask(g); 4592 g->ops.gr.set_hww_esr_report_mask(g);
4459 4593
4460 /* enable TPC exceptions per GPC */ 4594 /* enable TPC exceptions per GPC */
4461 if (g->ops.gr.enable_gpc_exceptions) 4595 if (g->ops.gr.enable_gpc_exceptions) {
4462 g->ops.gr.enable_gpc_exceptions(g); 4596 g->ops.gr.enable_gpc_exceptions(g);
4597 }
4463 4598
4464 /* enable ECC for L1/SM */ 4599 /* enable ECC for L1/SM */
4465 if (g->ops.gr.ecc_init_scrub_reg) 4600 if (g->ops.gr.ecc_init_scrub_reg) {
4466 g->ops.gr.ecc_init_scrub_reg(g); 4601 g->ops.gr.ecc_init_scrub_reg(g);
4602 }
4467 4603
4468 /* TBD: enable per BE exceptions */ 4604 /* TBD: enable per BE exceptions */
4469 4605
@@ -4472,14 +4608,17 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4472 4608
4473 gr_gk20a_load_zbc_table(g, gr); 4609 gr_gk20a_load_zbc_table(g, gr);
4474 4610
4475 if (g->ops.ltc.init_cbc) 4611 if (g->ops.ltc.init_cbc) {
4476 g->ops.ltc.init_cbc(g, gr); 4612 g->ops.ltc.init_cbc(g, gr);
4613 }
4477 4614
4478 if (g->ops.fb.init_cbc) 4615 if (g->ops.fb.init_cbc) {
4479 g->ops.fb.init_cbc(g, gr); 4616 g->ops.fb.init_cbc(g, gr);
4617 }
4480 4618
4481 if (g->ops.gr.disable_rd_coalesce) 4619 if (g->ops.gr.disable_rd_coalesce) {
4482 g->ops.gr.disable_rd_coalesce(g); 4620 g->ops.gr.disable_rd_coalesce(g);
4621 }
4483 4622
4484 /* load ctx init */ 4623 /* load ctx init */
4485 for (i = 0; i < sw_ctx_load->count; i++) { 4624 for (i = 0; i < sw_ctx_load->count; i++) {
@@ -4489,13 +4628,15 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4489 4628
4490 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 4629 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4491 GR_IDLE_CHECK_DEFAULT); 4630 GR_IDLE_CHECK_DEFAULT);
4492 if (err) 4631 if (err) {
4493 goto out; 4632 goto out;
4633 }
4494 4634
4495 if (g->ops.gr.init_preemption_state) { 4635 if (g->ops.gr.init_preemption_state) {
4496 err = g->ops.gr.init_preemption_state(g); 4636 err = g->ops.gr.init_preemption_state(g);
4497 if (err) 4637 if (err) {
4498 goto out; 4638 goto out;
4639 }
4499 } 4640 }
4500 4641
4501 /* disable fe_go_idle */ 4642 /* disable fe_go_idle */
@@ -4507,13 +4648,15 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4507 4648
4508 /* floorsweep anything left */ 4649 /* floorsweep anything left */
4509 err = g->ops.gr.init_fs_state(g); 4650 err = g->ops.gr.init_fs_state(g);
4510 if (err) 4651 if (err) {
4511 goto out; 4652 goto out;
4653 }
4512 4654
4513 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 4655 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4514 GR_IDLE_CHECK_DEFAULT); 4656 GR_IDLE_CHECK_DEFAULT);
4515 if (err) 4657 if (err) {
4516 goto restore_fe_go_idle; 4658 goto restore_fe_go_idle;
4659 }
4517 4660
4518restore_fe_go_idle: 4661restore_fe_go_idle:
4519 /* restore fe_go_idle */ 4662 /* restore fe_go_idle */
@@ -4521,8 +4664,9 @@ restore_fe_go_idle:
4521 gr_fe_go_idle_timeout_count_prod_f()); 4664 gr_fe_go_idle_timeout_count_prod_f());
4522 4665
4523 if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 4666 if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4524 GR_IDLE_CHECK_DEFAULT)) 4667 GR_IDLE_CHECK_DEFAULT)) {
4525 goto out; 4668 goto out;
4669 }
4526 4670
4527 /* load method init */ 4671 /* load method init */
4528 if (sw_method_init->count) { 4672 if (sw_method_init->count) {
@@ -4556,40 +4700,51 @@ static void gr_gk20a_load_gating_prod(struct gk20a *g)
4556 nvgpu_log_fn(g, " "); 4700 nvgpu_log_fn(g, " ");
4557 4701
4558 /* slcg prod values */ 4702 /* slcg prod values */
4559 if (g->ops.clock_gating.slcg_bus_load_gating_prod) 4703 if (g->ops.clock_gating.slcg_bus_load_gating_prod) {
4560 g->ops.clock_gating.slcg_bus_load_gating_prod(g, 4704 g->ops.clock_gating.slcg_bus_load_gating_prod(g,
4561 g->slcg_enabled); 4705 g->slcg_enabled);
4562 if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) 4706 }
4707 if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) {
4563 g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, 4708 g->ops.clock_gating.slcg_chiplet_load_gating_prod(g,
4564 g->slcg_enabled); 4709 g->slcg_enabled);
4565 if (g->ops.clock_gating.slcg_gr_load_gating_prod) 4710 }
4711 if (g->ops.clock_gating.slcg_gr_load_gating_prod) {
4566 g->ops.clock_gating.slcg_gr_load_gating_prod(g, 4712 g->ops.clock_gating.slcg_gr_load_gating_prod(g,
4567 g->slcg_enabled); 4713 g->slcg_enabled);
4568 if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) 4714 }
4715 if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) {
4569 g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, 4716 g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g,
4570 g->slcg_enabled); 4717 g->slcg_enabled);
4571 if (g->ops.clock_gating.slcg_perf_load_gating_prod) 4718 }
4719 if (g->ops.clock_gating.slcg_perf_load_gating_prod) {
4572 g->ops.clock_gating.slcg_perf_load_gating_prod(g, 4720 g->ops.clock_gating.slcg_perf_load_gating_prod(g,
4573 g->slcg_enabled); 4721 g->slcg_enabled);
4574 if (g->ops.clock_gating.slcg_xbar_load_gating_prod) 4722 }
4723 if (g->ops.clock_gating.slcg_xbar_load_gating_prod) {
4575 g->ops.clock_gating.slcg_xbar_load_gating_prod(g, 4724 g->ops.clock_gating.slcg_xbar_load_gating_prod(g,
4576 g->slcg_enabled); 4725 g->slcg_enabled);
4726 }
4577 4727
4578 /* blcg prod values */ 4728 /* blcg prod values */
4579 if (g->ops.clock_gating.blcg_bus_load_gating_prod) 4729 if (g->ops.clock_gating.blcg_bus_load_gating_prod) {
4580 g->ops.clock_gating.blcg_bus_load_gating_prod(g, 4730 g->ops.clock_gating.blcg_bus_load_gating_prod(g,
4581 g->blcg_enabled); 4731 g->blcg_enabled);
4582 if (g->ops.clock_gating.blcg_gr_load_gating_prod) 4732 }
4733 if (g->ops.clock_gating.blcg_gr_load_gating_prod) {
4583 g->ops.clock_gating.blcg_gr_load_gating_prod(g, 4734 g->ops.clock_gating.blcg_gr_load_gating_prod(g,
4584 g->blcg_enabled); 4735 g->blcg_enabled);
4585 if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) 4736 }
4737 if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) {
4586 g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, 4738 g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g,
4587 g->blcg_enabled); 4739 g->blcg_enabled);
4588 if (g->ops.clock_gating.blcg_xbar_load_gating_prod) 4740 }
4741 if (g->ops.clock_gating.blcg_xbar_load_gating_prod) {
4589 g->ops.clock_gating.blcg_xbar_load_gating_prod(g, 4742 g->ops.clock_gating.blcg_xbar_load_gating_prod(g,
4590 g->blcg_enabled); 4743 g->blcg_enabled);
4591 if (g->ops.clock_gating.pg_gr_load_gating_prod) 4744 }
4745 if (g->ops.clock_gating.pg_gr_load_gating_prod) {
4592 g->ops.clock_gating.pg_gr_load_gating_prod(g, true); 4746 g->ops.clock_gating.pg_gr_load_gating_prod(g, true);
4747 }
4593 4748
4594 nvgpu_log_fn(g, "done"); 4749 nvgpu_log_fn(g, "done");
4595} 4750}
@@ -4624,9 +4779,10 @@ static int gk20a_init_gr_prepare(struct gk20a *g)
4624 4779
4625 if (!g->gr.ctx_vars.valid) { 4780 if (!g->gr.ctx_vars.valid) {
4626 err = gr_gk20a_init_ctx_vars(g, &g->gr); 4781 err = gr_gk20a_init_ctx_vars(g, &g->gr);
4627 if (err) 4782 if (err) {
4628 nvgpu_err(g, 4783 nvgpu_err(g,
4629 "fail to load gr init ctx"); 4784 "fail to load gr init ctx");
4785 }
4630 } 4786 }
4631 return err; 4787 return err;
4632} 4788}
@@ -4669,18 +4825,21 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g)
4669 u32 err = 0; 4825 u32 err = 0;
4670 4826
4671 err = g->ops.gr.load_ctxsw_ucode(g); 4827 err = g->ops.gr.load_ctxsw_ucode(g);
4672 if (err) 4828 if (err) {
4673 goto out; 4829 goto out;
4830 }
4674 4831
4675 err = gr_gk20a_wait_ctxsw_ready(g); 4832 err = gr_gk20a_wait_ctxsw_ready(g);
4676 if (err) 4833 if (err) {
4677 goto out; 4834 goto out;
4835 }
4678 4836
4679out: 4837out:
4680 if (err) 4838 if (err) {
4681 nvgpu_err(g, "fail"); 4839 nvgpu_err(g, "fail");
4682 else 4840 } else {
4683 nvgpu_log_fn(g, "done"); 4841 nvgpu_log_fn(g, "done");
4842 }
4684 4843
4685 return err; 4844 return err;
4686} 4845}
@@ -4703,19 +4862,22 @@ static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
4703 } 4862 }
4704 4863
4705 err = gr_gk20a_wait_mem_scrubbing(g); 4864 err = gr_gk20a_wait_mem_scrubbing(g);
4706 if (err) 4865 if (err) {
4707 goto out; 4866 goto out;
4867 }
4708 4868
4709 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), 4869 err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
4710 GR_IDLE_CHECK_DEFAULT); 4870 GR_IDLE_CHECK_DEFAULT);
4711 if (err) 4871 if (err) {
4712 goto out; 4872 goto out;
4873 }
4713 4874
4714out: 4875out:
4715 if (err) 4876 if (err) {
4716 nvgpu_err(g, "fail"); 4877 nvgpu_err(g, "fail");
4717 else 4878 } else {
4718 nvgpu_log_fn(g, "done"); 4879 nvgpu_log_fn(g, "done");
4880 }
4719 4881
4720 return 0; 4882 return 0;
4721} 4883}
@@ -4774,42 +4936,51 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
4774#endif 4936#endif
4775 4937
4776 err = gr_gk20a_init_gr_config(g, gr); 4938 err = gr_gk20a_init_gr_config(g, gr);
4777 if (err) 4939 if (err) {
4778 goto clean_up; 4940 goto clean_up;
4941 }
4779 4942
4780 err = gr_gk20a_init_mmu_sw(g, gr); 4943 err = gr_gk20a_init_mmu_sw(g, gr);
4781 if (err) 4944 if (err) {
4782 goto clean_up; 4945 goto clean_up;
4946 }
4783 4947
4784 err = gr_gk20a_init_map_tiles(g, gr); 4948 err = gr_gk20a_init_map_tiles(g, gr);
4785 if (err) 4949 if (err) {
4786 goto clean_up; 4950 goto clean_up;
4951 }
4787 4952
4788 if (g->ops.ltc.init_comptags) { 4953 if (g->ops.ltc.init_comptags) {
4789 err = g->ops.ltc.init_comptags(g, gr); 4954 err = g->ops.ltc.init_comptags(g, gr);
4790 if (err) 4955 if (err) {
4791 goto clean_up; 4956 goto clean_up;
4957 }
4792 } 4958 }
4793 4959
4794 err = gr_gk20a_init_zcull(g, gr); 4960 err = gr_gk20a_init_zcull(g, gr);
4795 if (err) 4961 if (err) {
4796 goto clean_up; 4962 goto clean_up;
4963 }
4797 4964
4798 err = g->ops.gr.alloc_global_ctx_buffers(g); 4965 err = g->ops.gr.alloc_global_ctx_buffers(g);
4799 if (err) 4966 if (err) {
4800 goto clean_up; 4967 goto clean_up;
4968 }
4801 4969
4802 err = gr_gk20a_init_access_map(g); 4970 err = gr_gk20a_init_access_map(g);
4803 if (err) 4971 if (err) {
4804 goto clean_up; 4972 goto clean_up;
4973 }
4805 4974
4806 gr_gk20a_load_zbc_default_table(g, gr); 4975 gr_gk20a_load_zbc_default_table(g, gr);
4807 4976
4808 if (g->ops.gr.init_czf_bypass) 4977 if (g->ops.gr.init_czf_bypass) {
4809 g->ops.gr.init_czf_bypass(g); 4978 g->ops.gr.init_czf_bypass(g);
4979 }
4810 4980
4811 if (g->ops.gr.init_gfxp_wfi_timeout_count) 4981 if (g->ops.gr.init_gfxp_wfi_timeout_count) {
4812 g->ops.gr.init_gfxp_wfi_timeout_count(g); 4982 g->ops.gr.init_gfxp_wfi_timeout_count(g);
4983 }
4813 4984
4814 err = nvgpu_mutex_init(&gr->ctx_mutex); 4985 err = nvgpu_mutex_init(&gr->ctx_mutex);
4815 if (err != 0) { 4986 if (err != 0) {
@@ -4823,8 +4994,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
4823 gr->sw_ready = true; 4994 gr->sw_ready = true;
4824 4995
4825 err = nvgpu_ecc_init_support(g); 4996 err = nvgpu_ecc_init_support(g);
4826 if (err) 4997 if (err) {
4827 goto clean_up; 4998 goto clean_up;
4999 }
4828 5000
4829 nvgpu_log_fn(g, "done"); 5001 nvgpu_log_fn(g, "done");
4830 return 0; 5002 return 0;
@@ -4895,27 +5067,32 @@ int gk20a_init_gr_support(struct gk20a *g)
4895 } 5067 }
4896 5068
4897 err = gr_gk20a_init_ctxsw(g); 5069 err = gr_gk20a_init_ctxsw(g);
4898 if (err) 5070 if (err) {
4899 return err; 5071 return err;
5072 }
4900 5073
4901 /* this appears query for sw states but fecs actually init 5074 /* this appears query for sw states but fecs actually init
4902 ramchain, etc so this is hw init */ 5075 ramchain, etc so this is hw init */
4903 err = g->ops.gr.init_ctx_state(g); 5076 err = g->ops.gr.init_ctx_state(g);
4904 if (err) 5077 if (err) {
4905 return err; 5078 return err;
5079 }
4906 5080
4907 err = gk20a_init_gr_setup_sw(g); 5081 err = gk20a_init_gr_setup_sw(g);
4908 if (err) 5082 if (err) {
4909 return err; 5083 return err;
5084 }
4910 5085
4911 err = gk20a_init_gr_setup_hw(g); 5086 err = gk20a_init_gr_setup_hw(g);
4912 if (err) 5087 if (err) {
4913 return err; 5088 return err;
5089 }
4914 5090
4915 if (g->can_elpg) { 5091 if (g->can_elpg) {
4916 err = gk20a_init_gr_bind_fecs_elpg(g); 5092 err = gk20a_init_gr_bind_fecs_elpg(g);
4917 if (err) 5093 if (err) {
4918 return err; 5094 return err;
5095 }
4919 } 5096 }
4920 5097
4921 gr_gk20a_enable_elcg(g); 5098 gr_gk20a_enable_elcg(g);
@@ -4991,12 +5168,14 @@ int gk20a_enable_gr_hw(struct gk20a *g)
4991 nvgpu_log_fn(g, " "); 5168 nvgpu_log_fn(g, " ");
4992 5169
4993 err = gk20a_init_gr_prepare(g); 5170 err = gk20a_init_gr_prepare(g);
4994 if (err) 5171 if (err) {
4995 return err; 5172 return err;
5173 }
4996 5174
4997 err = gk20a_init_gr_reset_enable_hw(g); 5175 err = gk20a_init_gr_reset_enable_hw(g);
4998 if (err) 5176 if (err) {
4999 return err; 5177 return err;
5178 }
5000 5179
5001 nvgpu_log_fn(g, "done"); 5180 nvgpu_log_fn(g, "done");
5002 5181
@@ -5042,8 +5221,9 @@ int gk20a_gr_reset(struct gk20a *g)
5042 /* this appears query for sw states but fecs actually init 5221 /* this appears query for sw states but fecs actually init
5043 ramchain, etc so this is hw init */ 5222 ramchain, etc so this is hw init */
5044 err = g->ops.gr.init_ctx_state(g); 5223 err = g->ops.gr.init_ctx_state(g);
5045 if (err) 5224 if (err) {
5046 return err; 5225 return err;
5226 }
5047 5227
5048 size = 0; 5228 size = 0;
5049 err = gr_gk20a_fecs_get_reglist_img_size(g, &size); 5229 err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
@@ -5159,8 +5339,9 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
5159 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); 5339 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
5160 int ret = 0; 5340 int ret = 0;
5161 5341
5162 if (!gr_fecs_intr) 5342 if (!gr_fecs_intr) {
5163 return 0; 5343 return 0;
5344 }
5164 5345
5165 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { 5346 if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
5166 gk20a_gr_set_error_notifier(g, isr_data, 5347 gk20a_gr_set_error_notifier(g, isr_data,
@@ -5206,9 +5387,10 @@ static int gk20a_gr_handle_class_error(struct gk20a *g,
5206 5387
5207 nvgpu_err(g, "trapped data low 0x%08x", 5388 nvgpu_err(g, "trapped data low 0x%08x",
5208 gk20a_readl(g, gr_trapped_data_lo_r())); 5389 gk20a_readl(g, gr_trapped_data_lo_r()));
5209 if (gr_trapped_addr_datahigh_v(isr_data->addr)) 5390 if (gr_trapped_addr_datahigh_v(isr_data->addr)) {
5210 nvgpu_err(g, "trapped data high 0x%08x", 5391 nvgpu_err(g, "trapped data high 0x%08x",
5211 gk20a_readl(g, gr_trapped_data_hi_r())); 5392 gk20a_readl(g, gr_trapped_data_hi_r()));
5393 }
5212 5394
5213 return -EINVAL; 5395 return -EINVAL;
5214} 5396}
@@ -5435,8 +5617,9 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5435 /* slow path */ 5617 /* slow path */
5436 for (chid = 0; chid < f->num_channels; chid++) { 5618 for (chid = 0; chid < f->num_channels; chid++) {
5437 struct channel_gk20a *ch = &f->channel[chid]; 5619 struct channel_gk20a *ch = &f->channel[chid];
5438 if (!gk20a_channel_get(ch)) 5620 if (!gk20a_channel_get(ch)) {
5439 continue; 5621 continue;
5622 }
5440 5623
5441 if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> 5624 if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
5442 ram_in_base_shift_v()) == 5625 ram_in_base_shift_v()) ==
@@ -5449,8 +5632,9 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5449 gk20a_channel_put(ch); 5632 gk20a_channel_put(ch);
5450 } 5633 }
5451 5634
5452 if (!ret) 5635 if (!ret) {
5453 goto unlock; 5636 goto unlock;
5637 }
5454 5638
5455 /* add to free tlb entry */ 5639 /* add to free tlb entry */
5456 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { 5640 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5473,8 +5657,9 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5473 5657
5474unlock: 5658unlock:
5475 nvgpu_spinlock_release(&gr->ch_tlb_lock); 5659 nvgpu_spinlock_release(&gr->ch_tlb_lock);
5476 if (curr_tsgid) 5660 if (curr_tsgid) {
5477 *curr_tsgid = tsgid; 5661 *curr_tsgid = tsgid;
5662 }
5478 return ret; 5663 return ret;
5479} 5664}
5480 5665
@@ -5507,8 +5692,9 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
5507 * assumption: all SMs will have debug mode enabled/disabled 5692 * assumption: all SMs will have debug mode enabled/disabled
5508 * uniformly. */ 5693 * uniformly. */
5509 if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) == 5694 if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
5510 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) 5695 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) {
5511 return true; 5696 return true;
5697 }
5512 5698
5513 return false; 5699 return false;
5514} 5700}
@@ -5571,8 +5757,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
5571 * Do not disable exceptions if the only SM exception is BPT_INT 5757 * Do not disable exceptions if the only SM exception is BPT_INT
5572 */ 5758 */
5573 if ((global_esr == gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) 5759 if ((global_esr == gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
5574 && (warp_esr == 0)) 5760 && (warp_esr == 0)) {
5575 disable_sm_exceptions = false; 5761 disable_sm_exceptions = false;
5762 }
5576 5763
5577 if (!ignore_debugger && disable_sm_exceptions) { 5764 if (!ignore_debugger && disable_sm_exceptions) {
5578 u32 tpc_exception_en = gk20a_readl(g, 5765 u32 tpc_exception_en = gk20a_readl(g,
@@ -5661,16 +5848,18 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
5661 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, 5848 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5662 "GPC%d TPC%d: SM exception pending", gpc, tpc); 5849 "GPC%d TPC%d: SM exception pending", gpc, tpc);
5663 5850
5664 if (g->ops.gr.handle_tpc_sm_ecc_exception) 5851 if (g->ops.gr.handle_tpc_sm_ecc_exception) {
5665 g->ops.gr.handle_tpc_sm_ecc_exception(g, gpc, tpc, 5852 g->ops.gr.handle_tpc_sm_ecc_exception(g, gpc, tpc,
5666 post_event, fault_ch, hww_global_esr); 5853 post_event, fault_ch, hww_global_esr);
5854 }
5667 5855
5668 g->ops.gr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel); 5856 g->ops.gr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel);
5669 5857
5670 for (sm = 0; sm < sm_per_tpc; sm++) { 5858 for (sm = 0; sm < sm_per_tpc; sm++) {
5671 5859
5672 if (!(esr_sm_sel & (1 << sm))) 5860 if (!(esr_sm_sel & (1 << sm))) {
5673 continue; 5861 continue;
5862 }
5674 5863
5675 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, 5864 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5676 "GPC%d TPC%d: SM%d exception pending", 5865 "GPC%d TPC%d: SM%d exception pending",
@@ -5698,9 +5887,10 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
5698 ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event); 5887 ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event);
5699 } 5888 }
5700 5889
5701 if (g->ops.gr.handle_tpc_mpc_exception) 5890 if (g->ops.gr.handle_tpc_mpc_exception) {
5702 ret |= g->ops.gr.handle_tpc_mpc_exception(g, 5891 ret |= g->ops.gr.handle_tpc_mpc_exception(g,
5703 gpc, tpc, post_event); 5892 gpc, tpc, post_event);
5893 }
5704 5894
5705 return ret; 5895 return ret;
5706} 5896}
@@ -5717,8 +5907,9 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5717 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " "); 5907 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " ");
5718 5908
5719 for (gpc = 0; gpc < gr->gpc_count; gpc++) { 5909 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
5720 if ((exception1 & (1 << gpc)) == 0) 5910 if ((exception1 & (1 << gpc)) == 0) {
5721 continue; 5911 continue;
5912 }
5722 5913
5723 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, 5914 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5724 "GPC%d exception pending", gpc); 5915 "GPC%d exception pending", gpc);
@@ -5731,8 +5922,9 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5731 /* check if any tpc has an exception */ 5922 /* check if any tpc has an exception */
5732 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { 5923 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
5733 if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) & 5924 if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) &
5734 (1 << tpc)) == 0) 5925 (1 << tpc)) == 0) {
5735 continue; 5926 continue;
5927 }
5736 5928
5737 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, 5929 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5738 "GPC%d: TPC%d exception pending", gpc, tpc); 5930 "GPC%d: TPC%d exception pending", gpc, tpc);
@@ -5776,11 +5968,13 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5776static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg, 5968static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg,
5777 u32 global_esr) 5969 u32 global_esr)
5778{ 5970{
5779 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) 5971 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) {
5780 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT); 5972 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT);
5973 }
5781 5974
5782 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) 5975 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) {
5783 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE); 5976 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE);
5977 }
5784 5978
5785 return 0; 5979 return 0;
5786} 5980}
@@ -5802,12 +5996,14 @@ int gk20a_gr_isr(struct gk20a *g)
5802 nvgpu_log_fn(g, " "); 5996 nvgpu_log_fn(g, " ");
5803 nvgpu_log(g, gpu_dbg_intr, "pgraph intr %08x", gr_intr); 5997 nvgpu_log(g, gpu_dbg_intr, "pgraph intr %08x", gr_intr);
5804 5998
5805 if (!gr_intr) 5999 if (!gr_intr) {
5806 return 0; 6000 return 0;
6001 }
5807 6002
5808 gr_engine_id = gk20a_fifo_get_gr_engine_id(g); 6003 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
5809 if (gr_engine_id != FIFO_INVAL_ENGINE_ID) 6004 if (gr_engine_id != FIFO_INVAL_ENGINE_ID) {
5810 gr_engine_id = BIT(gr_engine_id); 6005 gr_engine_id = BIT(gr_engine_id);
6006 }
5811 6007
5812 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); 6008 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
5813 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); 6009 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
@@ -5835,8 +6031,9 @@ int gk20a_gr_isr(struct gk20a *g)
5835 nvgpu_err(g, "ch id is INVALID 0xffffffff"); 6031 nvgpu_err(g, "ch id is INVALID 0xffffffff");
5836 } 6032 }
5837 6033
5838 if (ch && gk20a_is_channel_marked_as_tsg(ch)) 6034 if (ch && gk20a_is_channel_marked_as_tsg(ch)) {
5839 tsg = &g->fifo.tsg[ch->tsgid]; 6035 tsg = &g->fifo.tsg[ch->tsgid];
6036 }
5840 6037
5841 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, 6038 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5842 "channel %d: addr 0x%08x, " 6039 "channel %d: addr 0x%08x, "
@@ -6047,18 +6244,19 @@ int gk20a_gr_isr(struct gk20a *g)
6047 } 6244 }
6048 6245
6049 if (need_reset) { 6246 if (need_reset) {
6050 if (tsgid != NVGPU_INVALID_TSG_ID) 6247 if (tsgid != NVGPU_INVALID_TSG_ID) {
6051 gk20a_fifo_recover(g, gr_engine_id, 6248 gk20a_fifo_recover(g, gr_engine_id,
6052 tsgid, true, true, true, 6249 tsgid, true, true, true,
6053 RC_TYPE_GR_FAULT); 6250 RC_TYPE_GR_FAULT);
6054 else if (ch) 6251 } else if (ch) {
6055 gk20a_fifo_recover(g, gr_engine_id, 6252 gk20a_fifo_recover(g, gr_engine_id,
6056 ch->chid, false, true, true, 6253 ch->chid, false, true, true,
6057 RC_TYPE_GR_FAULT); 6254 RC_TYPE_GR_FAULT);
6058 else 6255 } else {
6059 gk20a_fifo_recover(g, gr_engine_id, 6256 gk20a_fifo_recover(g, gr_engine_id,
6060 0, false, false, true, 6257 0, false, false, true,
6061 RC_TYPE_GR_FAULT); 6258 RC_TYPE_GR_FAULT);
6259 }
6062 } 6260 }
6063 6261
6064 if (gr_intr && !ch) { 6262 if (gr_intr && !ch) {
@@ -6075,16 +6273,19 @@ int gk20a_gr_isr(struct gk20a *g)
6075 grfifo_ctl | gr_gpfifo_ctl_access_f(1) | 6273 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
6076 gr_gpfifo_ctl_semaphore_access_f(1)); 6274 gr_gpfifo_ctl_semaphore_access_f(1));
6077 6275
6078 if (gr_intr) 6276 if (gr_intr) {
6079 nvgpu_err(g, 6277 nvgpu_err(g,
6080 "unhandled gr interrupt 0x%08x", gr_intr); 6278 "unhandled gr interrupt 0x%08x", gr_intr);
6279 }
6081 6280
6082 /* Posting of BPT events should be the last thing in this function */ 6281 /* Posting of BPT events should be the last thing in this function */
6083 if (global_esr && tsg) 6282 if (global_esr && tsg) {
6084 gk20a_gr_post_bpt_events(g, tsg, global_esr); 6283 gk20a_gr_post_bpt_events(g, tsg, global_esr);
6284 }
6085 6285
6086 if (ch) 6286 if (ch) {
6087 gk20a_channel_put(ch); 6287 gk20a_channel_put(ch);
6288 }
6088 6289
6089 return 0; 6290 return 0;
6090} 6291}
@@ -6166,8 +6367,9 @@ int gk20a_gr_suspend(struct gk20a *g)
6166 6367
6167 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g), 6368 ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g),
6168 GR_IDLE_CHECK_DEFAULT); 6369 GR_IDLE_CHECK_DEFAULT);
6169 if (ret) 6370 if (ret) {
6170 return ret; 6371 return ret;
6372 }
6171 6373
6172 gk20a_writel(g, gr_gpfifo_ctl_r(), 6374 gk20a_writel(g, gr_gpfifo_ctl_r(),
6173 gr_gpfifo_ctl_access_disabled_f()); 6375 gr_gpfifo_ctl_access_disabled_f());
@@ -6227,8 +6429,9 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6227 if (pri_is_gpc_addr_shared(g, addr)) { 6429 if (pri_is_gpc_addr_shared(g, addr)) {
6228 *addr_type = CTXSW_ADDR_TYPE_GPC; 6430 *addr_type = CTXSW_ADDR_TYPE_GPC;
6229 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; 6431 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
6230 } else 6432 } else {
6231 *gpc_num = pri_get_gpc_num(g, addr); 6433 *gpc_num = pri_get_gpc_num(g, addr);
6434 }
6232 6435
6233 if (pri_is_ppc_addr(g, gpc_addr)) { 6436 if (pri_is_ppc_addr(g, gpc_addr)) {
6234 *addr_type = CTXSW_ADDR_TYPE_PPC; 6437 *addr_type = CTXSW_ADDR_TYPE_PPC;
@@ -6256,10 +6459,11 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6256 return 0; 6459 return 0;
6257 } else if (g->ops.ltc.pri_is_ltc_addr(g, addr)) { 6460 } else if (g->ops.ltc.pri_is_ltc_addr(g, addr)) {
6258 *addr_type = CTXSW_ADDR_TYPE_LTCS; 6461 *addr_type = CTXSW_ADDR_TYPE_LTCS;
6259 if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) 6462 if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) {
6260 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS; 6463 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
6261 else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) 6464 } else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) {
6262 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS; 6465 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
6466 }
6263 return 0; 6467 return 0;
6264 } else if (pri_is_fbpa_addr(g, addr)) { 6468 } else if (pri_is_fbpa_addr(g, addr)) {
6265 *addr_type = CTXSW_ADDR_TYPE_FBPA; 6469 *addr_type = CTXSW_ADDR_TYPE_FBPA;
@@ -6338,8 +6542,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6338 &gpc_num, &tpc_num, &ppc_num, &be_num, 6542 &gpc_num, &tpc_num, &ppc_num, &be_num,
6339 &broadcast_flags); 6543 &broadcast_flags);
6340 nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); 6544 nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
6341 if (err) 6545 if (err) {
6342 return err; 6546 return err;
6547 }
6343 6548
6344 if ((addr_type == CTXSW_ADDR_TYPE_SYS) || 6549 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
6345 (addr_type == CTXSW_ADDR_TYPE_BE)) { 6550 (addr_type == CTXSW_ADDR_TYPE_BE)) {
@@ -6347,10 +6552,11 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6347 * table. Convert a BE unicast address to a broadcast address 6552 * table. Convert a BE unicast address to a broadcast address
6348 * so that we can look up the offset. */ 6553 * so that we can look up the offset. */
6349 if ((addr_type == CTXSW_ADDR_TYPE_BE) && 6554 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
6350 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) 6555 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) {
6351 priv_addr_table[t++] = pri_be_shared_addr(g, addr); 6556 priv_addr_table[t++] = pri_be_shared_addr(g, addr);
6352 else 6557 } else {
6353 priv_addr_table[t++] = addr; 6558 priv_addr_table[t++] = addr;
6559 }
6354 6560
6355 *num_registers = t; 6561 *num_registers = t;
6356 return 0; 6562 return 0;
@@ -6362,7 +6568,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6362 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { 6568 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
6363 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { 6569 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
6364 6570
6365 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) 6571 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) {
6366 for (tpc_num = 0; 6572 for (tpc_num = 0;
6367 tpc_num < g->gr.gpc_tpc_count[gpc_num]; 6573 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6368 tpc_num++) { 6574 tpc_num++) {
@@ -6371,11 +6577,12 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6371 gpc_num, tpc_num); 6577 gpc_num, tpc_num);
6372 } 6578 }
6373 6579
6374 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { 6580 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
6375 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, 6581 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
6376 priv_addr_table, &t); 6582 priv_addr_table, &t);
6377 if (err) 6583 if (err) {
6378 return err; 6584 return err;
6585 }
6379 } else { 6586 } else {
6380 priv_addr = pri_gpc_addr(g, 6587 priv_addr = pri_gpc_addr(g,
6381 pri_gpccs_addr_mask(addr), 6588 pri_gpccs_addr_mask(addr),
@@ -6383,8 +6590,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6383 6590
6384 gpc_addr = pri_gpccs_addr_mask(priv_addr); 6591 gpc_addr = pri_gpccs_addr_mask(priv_addr);
6385 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); 6592 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6386 if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) 6593 if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) {
6387 continue; 6594 continue;
6595 }
6388 6596
6389 priv_addr_table[t++] = priv_addr; 6597 priv_addr_table[t++] = priv_addr;
6390 } 6598 }
@@ -6406,7 +6614,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6406 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS), 6614 nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS),
6407 priv_addr_table, &t); 6615 priv_addr_table, &t);
6408 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { 6616 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) {
6409 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) 6617 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) {
6410 for (tpc_num = 0; 6618 for (tpc_num = 0;
6411 tpc_num < g->gr.gpc_tpc_count[gpc_num]; 6619 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6412 tpc_num++) { 6620 tpc_num++) {
@@ -6414,11 +6622,12 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6414 pri_tpc_addr(g, pri_tpccs_addr_mask(addr), 6622 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6415 gpc_num, tpc_num); 6623 gpc_num, tpc_num);
6416 } 6624 }
6417 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) 6625 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
6418 err = gr_gk20a_split_ppc_broadcast_addr(g, 6626 err = gr_gk20a_split_ppc_broadcast_addr(g,
6419 addr, gpc_num, priv_addr_table, &t); 6627 addr, gpc_num, priv_addr_table, &t);
6420 else 6628 } else {
6421 priv_addr_table[t++] = addr; 6629 priv_addr_table[t++] = addr;
6630 }
6422 } 6631 }
6423 6632
6424 *num_registers = t; 6633 *num_registers = t;
@@ -6450,8 +6659,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6450 return -EINVAL; 6659 return -EINVAL;
6451 } 6660 }
6452 6661
6453 if (!g->gr.ctx_vars.golden_image_initialized) 6662 if (!g->gr.ctx_vars.golden_image_initialized) {
6454 return -ENODEV; 6663 return -ENODEV;
6664 }
6455 6665
6456 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets); 6666 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6457 if (!priv_registers) { 6667 if (!priv_registers) {
@@ -6473,8 +6683,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6473 goto cleanup; 6683 goto cleanup;
6474 } 6684 }
6475 6685
6476 if ((max_offsets == 1) && (num_registers > 1)) 6686 if ((max_offsets == 1) && (num_registers > 1)) {
6477 num_registers = 1; 6687 num_registers = 1;
6688 }
6478 6689
6479 if (!g->gr.ctx_vars.local_golden_image) { 6690 if (!g->gr.ctx_vars.local_golden_image) {
6480 nvgpu_log_fn(g, "no context switch header info to work with"); 6691 nvgpu_log_fn(g, "no context switch header info to work with");
@@ -6501,8 +6712,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6501 6712
6502 *num_offsets = num_registers; 6713 *num_offsets = num_registers;
6503cleanup: 6714cleanup:
6504 if (!IS_ERR_OR_NULL(priv_registers)) 6715 if (!IS_ERR_OR_NULL(priv_registers)) {
6505 nvgpu_kfree(g, priv_registers); 6716 nvgpu_kfree(g, priv_registers);
6717 }
6506 6718
6507 return err; 6719 return err;
6508} 6720}
@@ -6526,11 +6738,13 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
6526 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 6738 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6527 6739
6528 /* implementation is crossed-up if either of these happen */ 6740 /* implementation is crossed-up if either of these happen */
6529 if (max_offsets > potential_offsets) 6741 if (max_offsets > potential_offsets) {
6530 return -EINVAL; 6742 return -EINVAL;
6743 }
6531 6744
6532 if (!g->gr.ctx_vars.golden_image_initialized) 6745 if (!g->gr.ctx_vars.golden_image_initialized) {
6533 return -ENODEV; 6746 return -ENODEV;
6747 }
6534 6748
6535 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets); 6749 priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6536 if (!priv_registers) { 6750 if (!priv_registers) {
@@ -6549,8 +6763,9 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
6549 goto cleanup; 6763 goto cleanup;
6550 } 6764 }
6551 6765
6552 if ((max_offsets == 1) && (num_registers > 1)) 6766 if ((max_offsets == 1) && (num_registers > 1)) {
6553 num_registers = 1; 6767 num_registers = 1;
6768 }
6554 6769
6555 if (!g->gr.ctx_vars.local_golden_image) { 6770 if (!g->gr.ctx_vars.local_golden_image) {
6556 nvgpu_log_fn(g, "no context switch header info to work with"); 6771 nvgpu_log_fn(g, "no context switch header info to work with");
@@ -6591,8 +6806,9 @@ static u32 _ovr_perf_regs[17] = { 0, };
6591 6806
6592void gk20a_gr_init_ovr_sm_dsm_perf(void) 6807void gk20a_gr_init_ovr_sm_dsm_perf(void)
6593{ 6808{
6594 if (_ovr_perf_regs[0] != 0) 6809 if (_ovr_perf_regs[0] != 0) {
6595 return; 6810 return;
6811 }
6596 6812
6597 _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(); 6813 _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r();
6598 _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(); 6814 _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r();
@@ -6640,8 +6856,9 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6640 struct nvgpu_mem *ctxheader = &ch->ctx_header; 6856 struct nvgpu_mem *ctxheader = &ch->ctx_header;
6641 6857
6642 tsg = tsg_gk20a_from_ch(ch); 6858 tsg = tsg_gk20a_from_ch(ch);
6643 if (!tsg) 6859 if (!tsg) {
6644 return -EINVAL; 6860 return -EINVAL;
6861 }
6645 6862
6646 gr_ctx = &tsg->gr_ctx; 6863 gr_ctx = &tsg->gr_ctx;
6647 g->ops.gr.init_ovr_sm_dsm_perf(); 6864 g->ops.gr.init_ovr_sm_dsm_perf();
@@ -6657,16 +6874,18 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6657 chk_addr = ((gpc_stride * gpc) + 6874 chk_addr = ((gpc_stride * gpc) +
6658 (tpc_in_gpc_stride * tpc) + 6875 (tpc_in_gpc_stride * tpc) +
6659 ovr_perf_regs[reg]); 6876 ovr_perf_regs[reg]);
6660 if (chk_addr != addr) 6877 if (chk_addr != addr) {
6661 continue; 6878 continue;
6879 }
6662 /* reset the patch count from previous 6880 /* reset the patch count from previous
6663 runs,if ucode has already processed 6881 runs,if ucode has already processed
6664 it */ 6882 it */
6665 tmp = nvgpu_mem_rd(g, mem, 6883 tmp = nvgpu_mem_rd(g, mem,
6666 ctxsw_prog_main_image_patch_count_o()); 6884 ctxsw_prog_main_image_patch_count_o());
6667 6885
6668 if (!tmp) 6886 if (!tmp) {
6669 gr_ctx->patch_ctx.data_count = 0; 6887 gr_ctx->patch_ctx.data_count = 0;
6888 }
6670 6889
6671 gr_gk20a_ctx_patch_write(g, gr_ctx, 6890 gr_gk20a_ctx_patch_write(g, gr_ctx,
6672 addr, data, true); 6891 addr, data, true);
@@ -6770,10 +6989,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6770 u32 gpc_addr = 0; 6989 u32 gpc_addr = 0;
6771 gpc_num = pri_get_gpc_num(g, addr); 6990 gpc_num = pri_get_gpc_num(g, addr);
6772 gpc_addr = pri_gpccs_addr_mask(addr); 6991 gpc_addr = pri_gpccs_addr_mask(addr);
6773 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) 6992 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
6774 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); 6993 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6775 else 6994 } else {
6776 return -EINVAL; 6995 return -EINVAL;
6996 }
6777 6997
6778 nvgpu_log_info(g, " gpc = %d tpc = %d", 6998 nvgpu_log_info(g, " gpc = %d tpc = %d",
6779 gpc_num, tpc_num); 6999 gpc_num, tpc_num);
@@ -6896,8 +7116,9 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6896 } 7116 }
6897 7117
6898 if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) && 7118 if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) &&
6899 (ILLEGAL_ID == sm_dsm_perf_reg_id)) 7119 (ILLEGAL_ID == sm_dsm_perf_reg_id)) {
6900 return -EINVAL; 7120 return -EINVAL;
7121 }
6901 7122
6902 /* Skip the FECS extended header, nothing there for us now. */ 7123 /* Skip the FECS extended header, nothing there for us now. */
6903 offset_to_segment += buffer_segments_size; 7124 offset_to_segment += buffer_segments_size;
@@ -6986,8 +7207,9 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6986 7207
6987 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); 7208 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
6988 7209
6989 if (!g->gr.ctx_vars.valid) 7210 if (!g->gr.ctx_vars.valid) {
6990 return -EINVAL; 7211 return -EINVAL;
7212 }
6991 7213
6992 /* Process the SYS/BE segment. */ 7214 /* Process the SYS/BE segment. */
6993 if ((addr_type == CTXSW_ADDR_TYPE_SYS) || 7215 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
@@ -7032,8 +7254,9 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7032 } 7254 }
7033 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || 7255 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
7034 (addr_type == CTXSW_ADDR_TYPE_ETPC)) { 7256 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
7035 if (!(g->ops.gr.get_egpc_base)) 7257 if (!(g->ops.gr.get_egpc_base)) {
7036 return -EINVAL; 7258 return -EINVAL;
7259 }
7037 7260
7038 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) { 7261 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
7039 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) { 7262 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
@@ -7130,8 +7353,9 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7130 */ 7353 */
7131 if ((!g->gr.ctx_vars.valid) || 7354 if ((!g->gr.ctx_vars.valid) ||
7132 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && 7355 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
7133 (num_pes_per_gpc > 1))) 7356 (num_pes_per_gpc > 1))) {
7134 return -EINVAL; 7357 return -EINVAL;
7358 }
7135 7359
7136 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); 7360 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7137 7361
@@ -7237,8 +7461,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7237 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, 7461 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7238 "addr_type = %d, broadcast_flags: %08x", 7462 "addr_type = %d, broadcast_flags: %08x",
7239 addr_type, broadcast_flags); 7463 addr_type, broadcast_flags);
7240 if (err) 7464 if (err) {
7241 return err; 7465 return err;
7466 }
7242 7467
7243 context = (u8 *)context_buffer; 7468 context = (u8 *)context_buffer;
7244 if (!check_main_image_header_magic(context)) { 7469 if (!check_main_image_header_magic(context)) {
@@ -7283,8 +7508,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7283 addr_type, addr, 7508 addr_type, addr,
7284 0, 0, 0, 0, 7509 0, 0, 0, 0,
7285 &offset); 7510 &offset);
7286 if (err) 7511 if (err) {
7287 return err; 7512 return err;
7513 }
7288 7514
7289 *priv_offset = (offset_to_segment + offset); 7515 *priv_offset = (offset_to_segment + offset);
7290 return 0; 7516 return 0;
@@ -7339,8 +7565,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7339 addr_type, 7565 addr_type,
7340 num_tpcs, num_ppcs, reg_list_ppc_count, 7566 num_tpcs, num_ppcs, reg_list_ppc_count,
7341 &offset_in_segment); 7567 &offset_in_segment);
7342 if (err) 7568 if (err) {
7343 return -EINVAL; 7569 return -EINVAL;
7570 }
7344 7571
7345 offset_to_segment += offset_in_segment; 7572 offset_to_segment += offset_in_segment;
7346 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, 7573 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
@@ -7352,8 +7579,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7352 i, num_tpcs, 7579 i, num_tpcs,
7353 num_ppcs, ppc_mask, 7580 num_ppcs, ppc_mask,
7354 &offset); 7581 &offset);
7355 if (err) 7582 if (err) {
7356 return -EINVAL; 7583 return -EINVAL;
7584 }
7357 7585
7358 *priv_offset = offset_to_segment + offset; 7586 *priv_offset = offset_to_segment + offset;
7359 return 0; 7587 return 0;
@@ -7370,11 +7598,13 @@ static int map_cmp(const void *a, const void *b)
7370 struct ctxsw_buf_offset_map_entry *e2 = 7598 struct ctxsw_buf_offset_map_entry *e2 =
7371 (struct ctxsw_buf_offset_map_entry *)b; 7599 (struct ctxsw_buf_offset_map_entry *)b;
7372 7600
7373 if (e1->addr < e2->addr) 7601 if (e1->addr < e2->addr) {
7374 return -1; 7602 return -1;
7603 }
7375 7604
7376 if (e1->addr > e2->addr) 7605 if (e1->addr > e2->addr) {
7377 return 1; 7606 return 1;
7607 }
7378 return 0; 7608 return 0;
7379} 7609}
7380 7610
@@ -7387,15 +7617,17 @@ static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry
7387 u32 cnt = *count; 7617 u32 cnt = *count;
7388 u32 off = *offset; 7618 u32 off = *offset;
7389 7619
7390 if ((cnt + regs->count) > max_cnt) 7620 if ((cnt + regs->count) > max_cnt) {
7391 return -EINVAL; 7621 return -EINVAL;
7622 }
7392 7623
7393 for (idx = 0; idx < regs->count; idx++) { 7624 for (idx = 0; idx < regs->count; idx++) {
7394 if ((base + (regs->l[idx].addr & mask)) < 0xFFF) 7625 if ((base + (regs->l[idx].addr & mask)) < 0xFFF) {
7395 map[cnt].addr = base + (regs->l[idx].addr & mask) 7626 map[cnt].addr = base + (regs->l[idx].addr & mask)
7396 + NV_PCFG_BASE; 7627 + NV_PCFG_BASE;
7397 else 7628 } else {
7398 map[cnt].addr = base + (regs->l[idx].addr & mask); 7629 map[cnt].addr = base + (regs->l[idx].addr & mask);
7630 }
7399 map[cnt++].offset = off; 7631 map[cnt++].offset = off;
7400 off += 4; 7632 off += 4;
7401 } 7633 }
@@ -7414,8 +7646,9 @@ static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
7414 u32 cnt = *count; 7646 u32 cnt = *count;
7415 u32 off = *offset; 7647 u32 off = *offset;
7416 7648
7417 if ((cnt + regs->count) > max_cnt) 7649 if ((cnt + regs->count) > max_cnt) {
7418 return -EINVAL; 7650 return -EINVAL;
7651 }
7419 7652
7420 /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1 7653 /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
7421 * To handle the case of PPC registers getting added into GPC, the below 7654 * To handle the case of PPC registers getting added into GPC, the below
@@ -7434,8 +7667,9 @@ static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
7434 7667
7435 map[cnt].addr = base + ppc_in_gpc_base 7668 map[cnt].addr = base + ppc_in_gpc_base
7436 + (regs->l[idx].addr & ppcmask); 7669 + (regs->l[idx].addr & ppcmask);
7437 } else 7670 } else {
7438 map[cnt].addr = base + (regs->l[idx].addr & mask); 7671 map[cnt].addr = base + (regs->l[idx].addr & mask);
7672 }
7439 map[cnt++].offset = off; 7673 map[cnt++].offset = off;
7440 off += 4; 7674 off += 4;
7441 } 7675 }
@@ -7453,8 +7687,9 @@ static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
7453 u32 cnt = *count; 7687 u32 cnt = *count;
7454 u32 off = *offset; 7688 u32 off = *offset;
7455 7689
7456 if ((cnt + regs->count) > max_cnt) 7690 if ((cnt + regs->count) > max_cnt) {
7457 return -EINVAL; 7691 return -EINVAL;
7692 }
7458 7693
7459 for (idx = 0; idx < regs->count; idx++) { 7694 for (idx = 0; idx < regs->count; idx++) {
7460 map[cnt].addr = base + (regs->l[idx].addr & mask); 7695 map[cnt].addr = base + (regs->l[idx].addr & mask);
@@ -7481,8 +7716,9 @@ static int add_ctxsw_buffer_map_entries_subunits(
7481 u32 cnt = *count; 7716 u32 cnt = *count;
7482 u32 off = *offset; 7717 u32 off = *offset;
7483 7718
7484 if ((cnt + (regs->count * num_units)) > max_cnt) 7719 if ((cnt + (regs->count * num_units)) > max_cnt) {
7485 return -EINVAL; 7720 return -EINVAL;
7721 }
7486 7722
7487 /* Data is interleaved for units in ctxsw buffer */ 7723 /* Data is interleaved for units in ctxsw buffer */
7488 for (idx = 0; idx < regs->count; idx++) { 7724 for (idx = 0; idx < regs->count; idx++) {
@@ -7529,8 +7765,9 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7529 &g->gr.ctx_vars.ctxsw_regs.pm_tpc, 7765 &g->gr.ctx_vars.ctxsw_regs.pm_tpc,
7530 count, offset, max_cnt, base, num_tpcs, 7766 count, offset, max_cnt, base, num_tpcs,
7531 tpc_in_gpc_stride, 7767 tpc_in_gpc_stride,
7532 (tpc_in_gpc_stride - 1))) 7768 (tpc_in_gpc_stride - 1))) {
7533 return -EINVAL; 7769 return -EINVAL;
7770 }
7534 7771
7535 num_ppcs = g->gr.gpc_ppc_count[gpc_num]; 7772 num_ppcs = g->gr.gpc_ppc_count[gpc_num];
7536 base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; 7773 base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
@@ -7538,33 +7775,38 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7538 &g->gr.ctx_vars.ctxsw_regs.pm_ppc, 7775 &g->gr.ctx_vars.ctxsw_regs.pm_ppc,
7539 count, offset, max_cnt, base, num_ppcs, 7776 count, offset, max_cnt, base, num_ppcs,
7540 ppc_in_gpc_stride, 7777 ppc_in_gpc_stride,
7541 (ppc_in_gpc_stride - 1))) 7778 (ppc_in_gpc_stride - 1))) {
7542 return -EINVAL; 7779 return -EINVAL;
7780 }
7543 7781
7544 base = gpc_base + (gpc_stride * gpc_num); 7782 base = gpc_base + (gpc_stride * gpc_num);
7545 if (add_ctxsw_buffer_map_entries_pmgpc(g, map, 7783 if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
7546 &g->gr.ctx_vars.ctxsw_regs.pm_gpc, 7784 &g->gr.ctx_vars.ctxsw_regs.pm_gpc,
7547 count, offset, max_cnt, base, 7785 count, offset, max_cnt, base,
7548 (gpc_stride - 1))) 7786 (gpc_stride - 1))) {
7549 return -EINVAL; 7787 return -EINVAL;
7788 }
7550 7789
7551 base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num; 7790 base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
7552 if (add_ctxsw_buffer_map_entries(map, 7791 if (add_ctxsw_buffer_map_entries(map,
7553 &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc, 7792 &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc,
7554 count, offset, max_cnt, base, ~0)) 7793 count, offset, max_cnt, base, ~0)) {
7555 return -EINVAL; 7794 return -EINVAL;
7795 }
7556 7796
7557 base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num); 7797 base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num);
7558 if (add_ctxsw_buffer_map_entries(map, 7798 if (add_ctxsw_buffer_map_entries(map,
7559 &g->gr.ctx_vars.ctxsw_regs.perf_gpc, 7799 &g->gr.ctx_vars.ctxsw_regs.perf_gpc,
7560 count, offset, max_cnt, base, ~0)) 7800 count, offset, max_cnt, base, ~0)) {
7561 return -EINVAL; 7801 return -EINVAL;
7802 }
7562 7803
7563 base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num); 7804 base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
7564 if (add_ctxsw_buffer_map_entries(map, 7805 if (add_ctxsw_buffer_map_entries(map,
7565 &g->gr.ctx_vars.ctxsw_regs.gpc_router, 7806 &g->gr.ctx_vars.ctxsw_regs.gpc_router,
7566 count, offset, max_cnt, base, ~0)) 7807 count, offset, max_cnt, base, ~0)) {
7567 return -EINVAL; 7808 return -EINVAL;
7809 }
7568 7810
7569 /* Counter Aggregation Unit, if available */ 7811 /* Counter Aggregation Unit, if available */
7570 if (g->gr.ctx_vars.ctxsw_regs.pm_cau.count) { 7812 if (g->gr.ctx_vars.ctxsw_regs.pm_cau.count) {
@@ -7574,8 +7816,9 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7574 &g->gr.ctx_vars.ctxsw_regs.pm_cau, 7816 &g->gr.ctx_vars.ctxsw_regs.pm_cau,
7575 count, offset, max_cnt, base, num_tpcs, 7817 count, offset, max_cnt, base, num_tpcs,
7576 tpc_in_gpc_stride, 7818 tpc_in_gpc_stride,
7577 (tpc_in_gpc_stride - 1))) 7819 (tpc_in_gpc_stride - 1))) {
7578 return -EINVAL; 7820 return -EINVAL;
7821 }
7579 } 7822 }
7580 7823
7581 *offset = ALIGN(*offset, 256); 7824 *offset = ALIGN(*offset, 256);
@@ -7678,28 +7921,33 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7678 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); 7921 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
7679 7922
7680 map = nvgpu_big_zalloc(g, map_size); 7923 map = nvgpu_big_zalloc(g, map_size);
7681 if (!map) 7924 if (!map) {
7682 return -ENOMEM; 7925 return -ENOMEM;
7926 }
7683 7927
7684 /* Add entries from _LIST_pm_ctx_reg_SYS */ 7928 /* Add entries from _LIST_pm_ctx_reg_SYS */
7685 if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, 7929 if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
7686 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) 7930 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7687 goto cleanup; 7931 goto cleanup;
7932 }
7688 7933
7689 /* Add entries from _LIST_nv_perf_ctx_reg_SYS */ 7934 /* Add entries from _LIST_nv_perf_ctx_reg_SYS */
7690 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys, 7935 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys,
7691 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) 7936 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7692 goto cleanup; 7937 goto cleanup;
7938 }
7693 7939
7694 /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/ 7940 /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
7695 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router, 7941 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router,
7696 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) 7942 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7697 goto cleanup; 7943 goto cleanup;
7944 }
7698 7945
7699 /* Add entries from _LIST_nv_perf_pma_ctx_reg*/ 7946 /* Add entries from _LIST_nv_perf_pma_ctx_reg*/
7700 if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma, 7947 if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma,
7701 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) 7948 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) {
7702 goto cleanup; 7949 goto cleanup;
7950 }
7703 7951
7704 offset = ALIGN(offset, 256); 7952 offset = ALIGN(offset, 256);
7705 7953
@@ -7710,46 +7958,52 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7710 hwpm_ctxsw_reg_count_max, 0, 7958 hwpm_ctxsw_reg_count_max, 0,
7711 g->gr.num_fbps, 7959 g->gr.num_fbps,
7712 g->ops.gr.get_pmm_per_chiplet_offset(), 7960 g->ops.gr.get_pmm_per_chiplet_offset(),
7713 ~0)) 7961 ~0)) {
7714 goto cleanup; 7962 goto cleanup;
7963 }
7715 7964
7716 /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */ 7965 /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
7717 if (add_ctxsw_buffer_map_entries_subunits(map, 7966 if (add_ctxsw_buffer_map_entries_subunits(map,
7718 &g->gr.ctx_vars.ctxsw_regs.fbp_router, 7967 &g->gr.ctx_vars.ctxsw_regs.fbp_router,
7719 &count, &offset, 7968 &count, &offset,
7720 hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps, 7969 hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps,
7721 NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) 7970 NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) {
7722 goto cleanup; 7971 goto cleanup;
7972 }
7723 7973
7724 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */ 7974 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
7725 if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map, 7975 if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map,
7726 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, 7976 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
7727 &count, &offset, 7977 &count, &offset,
7728 hwpm_ctxsw_reg_count_max, 0, 7978 hwpm_ctxsw_reg_count_max, 0,
7729 num_fbpas, fbpa_stride, ~0)) 7979 num_fbpas, fbpa_stride, ~0)) {
7730 goto cleanup; 7980 goto cleanup;
7981 }
7731 7982
7732 /* Add entries from _LIST_nv_pm_rop_ctx_regs */ 7983 /* Add entries from _LIST_nv_pm_rop_ctx_regs */
7733 if (add_ctxsw_buffer_map_entries(map, 7984 if (add_ctxsw_buffer_map_entries(map,
7734 &g->gr.ctx_vars.ctxsw_regs.pm_rop, 7985 &g->gr.ctx_vars.ctxsw_regs.pm_rop,
7735 &count, &offset, 7986 &count, &offset,
7736 hwpm_ctxsw_reg_count_max, 0, ~0)) 7987 hwpm_ctxsw_reg_count_max, 0, ~0)) {
7737 goto cleanup; 7988 goto cleanup;
7989 }
7738 7990
7739 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ 7991 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
7740 if (add_ctxsw_buffer_map_entries_subunits(map, 7992 if (add_ctxsw_buffer_map_entries_subunits(map,
7741 &g->gr.ctx_vars.ctxsw_regs.pm_ltc, 7993 &g->gr.ctx_vars.ctxsw_regs.pm_ltc,
7742 &count, &offset, 7994 &count, &offset,
7743 hwpm_ctxsw_reg_count_max, 0, 7995 hwpm_ctxsw_reg_count_max, 0,
7744 num_ltc, ltc_stride, ~0)) 7996 num_ltc, ltc_stride, ~0)) {
7745 goto cleanup; 7997 goto cleanup;
7998 }
7746 7999
7747 offset = ALIGN(offset, 256); 8000 offset = ALIGN(offset, 256);
7748 8001
7749 /* Add GPC entries */ 8002 /* Add GPC entries */
7750 if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset, 8003 if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
7751 hwpm_ctxsw_reg_count_max)) 8004 hwpm_ctxsw_reg_count_max)) {
7752 goto cleanup; 8005 goto cleanup;
8006 }
7753 8007
7754 if (offset > hwpm_ctxsw_buffer_size) { 8008 if (offset > hwpm_ctxsw_buffer_size) {
7755 nvgpu_err(g, "offset > buffer size"); 8009 nvgpu_err(g, "offset > buffer size");
@@ -7792,8 +8046,9 @@ static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
7792 /* Create map of pri address and pm offset if necessary */ 8046 /* Create map of pri address and pm offset if necessary */
7793 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) { 8047 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) {
7794 err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g); 8048 err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g);
7795 if (err) 8049 if (err) {
7796 return err; 8050 return err;
8051 }
7797 } 8052 }
7798 8053
7799 *priv_offset = 0; 8054 *priv_offset = 0;
@@ -7804,9 +8059,9 @@ static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
7804 map_key.addr = addr; 8059 map_key.addr = addr;
7805 result = bsearch(&map_key, map, count, sizeof(*map), map_cmp); 8060 result = bsearch(&map_key, map, count, sizeof(*map), map_cmp);
7806 8061
7807 if (result) 8062 if (result) {
7808 *priv_offset = result->offset; 8063 *priv_offset = result->offset;
7809 else { 8064 } else {
7810 nvgpu_err(g, "Lookup failed for address 0x%x", addr); 8065 nvgpu_err(g, "Lookup failed for address 0x%x", addr);
7811 err = -EINVAL; 8066 err = -EINVAL;
7812 } 8067 }
@@ -7827,8 +8082,9 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
7827 * valid bit must be checked to be absolutely certain that a 8082 * valid bit must be checked to be absolutely certain that a
7828 * valid context is currently resident. 8083 * valid context is currently resident.
7829 */ 8084 */
7830 if (!gr_fecs_current_ctx_valid_v(curr_gr_ctx)) 8085 if (!gr_fecs_current_ctx_valid_v(curr_gr_ctx)) {
7831 return NULL; 8086 return NULL;
8087 }
7832 8088
7833 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx, 8089 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
7834 &curr_gr_tsgid); 8090 &curr_gr_tsgid);
@@ -7841,14 +8097,17 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
7841 ch->tsgid, 8097 ch->tsgid,
7842 ch->chid); 8098 ch->chid);
7843 8099
7844 if (!curr_ch) 8100 if (!curr_ch) {
7845 return false; 8101 return false;
8102 }
7846 8103
7847 if (ch->chid == curr_ch->chid) 8104 if (ch->chid == curr_ch->chid) {
7848 ret = true; 8105 ret = true;
8106 }
7849 8107
7850 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) 8108 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) {
7851 ret = true; 8109 ret = true;
8110 }
7852 8111
7853 gk20a_channel_put(curr_ch); 8112 gk20a_channel_put(curr_ch);
7854 return ret; 8113 return ret;
@@ -7879,8 +8138,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7879 num_ctx_wr_ops, num_ctx_rd_ops); 8138 num_ctx_wr_ops, num_ctx_rd_ops);
7880 8139
7881 tsg = tsg_gk20a_from_ch(ch); 8140 tsg = tsg_gk20a_from_ch(ch);
7882 if (!tsg) 8141 if (!tsg) {
7883 return -EINVAL; 8142 return -EINVAL;
8143 }
7884 8144
7885 gr_ctx = &tsg->gr_ctx; 8145 gr_ctx = &tsg->gr_ctx;
7886 8146
@@ -7891,15 +8151,17 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7891 /* only do ctx ops and only on the right pass */ 8151 /* only do ctx ops and only on the right pass */
7892 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || 8152 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
7893 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || 8153 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
7894 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) 8154 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) {
7895 continue; 8155 continue;
8156 }
7896 8157
7897 /* if this is a quad access, setup for special access*/ 8158 /* if this is a quad access, setup for special access*/
7898 if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) 8159 if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)
7899 && g->ops.gr.access_smpc_reg) 8160 && g->ops.gr.access_smpc_reg) {
7900 g->ops.gr.access_smpc_reg(g, 8161 g->ops.gr.access_smpc_reg(g,
7901 ctx_ops[i].quad, 8162 ctx_ops[i].quad,
7902 ctx_ops[i].offset); 8163 ctx_ops[i].offset);
8164 }
7903 offset = ctx_ops[i].offset; 8165 offset = ctx_ops[i].offset;
7904 8166
7905 if (pass == 0) { /* write pass */ 8167 if (pass == 0) { /* write pass */
@@ -7938,8 +8200,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7938 nvgpu_log(g, gpu_dbg_gpu_dbg, 8200 nvgpu_log(g, gpu_dbg_gpu_dbg,
7939 "direct rd: offset=0x%x v=0x%x", 8201 "direct rd: offset=0x%x v=0x%x",
7940 offset, ctx_ops[i].value_lo); 8202 offset, ctx_ops[i].value_lo);
7941 } else 8203 } else {
7942 ctx_ops[i].value_hi = 0; 8204 ctx_ops[i].value_hi = 0;
8205 }
7943 } 8206 }
7944 ctx_op_nr++; 8207 ctx_op_nr++;
7945 } 8208 }
@@ -7956,8 +8219,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7956 offset_addrs = offsets + max_offsets; 8219 offset_addrs = offsets + max_offsets;
7957 8220
7958 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); 8221 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
7959 if (err) 8222 if (err) {
7960 goto cleanup; 8223 goto cleanup;
8224 }
7961 8225
7962 g->ops.mm.l2_flush(g, true); 8226 g->ops.mm.l2_flush(g, true);
7963 8227
@@ -7973,8 +8237,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7973 /* only do ctx ops and only on the right pass */ 8237 /* only do ctx ops and only on the right pass */
7974 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || 8238 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
7975 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || 8239 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
7976 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) 8240 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) {
7977 continue; 8241 continue;
8242 }
7978 8243
7979 err = gr_gk20a_get_ctx_buffer_offsets(g, 8244 err = gr_gk20a_get_ctx_buffer_offsets(g,
7980 ctx_ops[i].offset, 8245 ctx_ops[i].offset,
@@ -7984,8 +8249,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7984 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 8249 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
7985 ctx_ops[i].quad); 8250 ctx_ops[i].quad);
7986 if (!err) { 8251 if (!err) {
7987 if (!gr_ctx_ready) 8252 if (!gr_ctx_ready) {
7988 gr_ctx_ready = true; 8253 gr_ctx_ready = true;
8254 }
7989 current_mem = &gr_ctx->mem; 8255 current_mem = &gr_ctx->mem;
7990 } else { 8256 } else {
7991 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 8257 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
@@ -8016,17 +8282,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8016 8282
8017 /* if this is a quad access, setup for special access*/ 8283 /* if this is a quad access, setup for special access*/
8018 if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) && 8284 if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) &&
8019 g->ops.gr.access_smpc_reg) 8285 g->ops.gr.access_smpc_reg) {
8020 g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad, 8286 g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
8021 ctx_ops[i].offset); 8287 ctx_ops[i].offset);
8288 }
8022 8289
8023 for (j = 0; j < num_offsets; j++) { 8290 for (j = 0; j < num_offsets; j++) {
8024 /* sanity check gr ctxt offsets, 8291 /* sanity check gr ctxt offsets,
8025 * don't write outside, worst case 8292 * don't write outside, worst case
8026 */ 8293 */
8027 if ((current_mem == &gr_ctx->mem) && 8294 if ((current_mem == &gr_ctx->mem) &&
8028 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 8295 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) {
8029 continue; 8296 continue;
8297 }
8030 if (pass == 0) { /* write pass */ 8298 if (pass == 0) { /* write pass */
8031 v = nvgpu_mem_rd(g, current_mem, offsets[j]); 8299 v = nvgpu_mem_rd(g, current_mem, offsets[j]);
8032 v &= ~ctx_ops[i].and_n_mask_lo; 8300 v &= ~ctx_ops[i].and_n_mask_lo;
@@ -8067,8 +8335,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8067 nvgpu_log(g, gpu_dbg_gpu_dbg, 8335 nvgpu_log(g, gpu_dbg_gpu_dbg,
8068 "context rd: offset=0x%x v=0x%x", 8336 "context rd: offset=0x%x v=0x%x",
8069 offsets[0] + 4, ctx_ops[i].value_hi); 8337 offsets[0] + 4, ctx_ops[i].value_hi);
8070 } else 8338 } else {
8071 ctx_ops[i].value_hi = 0; 8339 ctx_ops[i].value_hi = 0;
8340 }
8072 } 8341 }
8073 } 8342 }
8074 ctx_op_nr++; 8343 ctx_op_nr++;
@@ -8076,11 +8345,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8076 } 8345 }
8077 8346
8078 cleanup: 8347 cleanup:
8079 if (offsets) 8348 if (offsets) {
8080 nvgpu_kfree(g, offsets); 8349 nvgpu_kfree(g, offsets);
8350 }
8081 8351
8082 if (gr_ctx->patch_ctx.mem.cpu_va) 8352 if (gr_ctx->patch_ctx.mem.cpu_va) {
8083 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); 8353 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
8354 }
8084 8355
8085 return err; 8356 return err;
8086} 8357}
@@ -8382,14 +8653,16 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
8382 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 8653 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
8383 8654
8384 ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); 8655 ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops));
8385 if (!ops) 8656 if (!ops) {
8386 return -ENOMEM; 8657 return -ENOMEM;
8658 }
8387 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { 8659 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
8388 int gpc, tpc; 8660 int gpc, tpc;
8389 u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val; 8661 u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val;
8390 8662
8391 if (!(sms & (1 << sm_id))) 8663 if (!(sms & (1 << sm_id))) {
8392 continue; 8664 continue;
8665 }
8393 8666
8394 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 8667 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8395 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 8668 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
@@ -8422,8 +8695,9 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
8422 } 8695 }
8423 8696
8424 err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0); 8697 err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0);
8425 if (err) 8698 if (err) {
8426 nvgpu_err(g, "Failed to access register"); 8699 nvgpu_err(g, "Failed to access register");
8700 }
8427 nvgpu_kfree(g, ops); 8701 nvgpu_kfree(g, ops);
8428 return err; 8702 return err;
8429} 8703}
@@ -8490,15 +8764,17 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
8490 ch = g->fifo.channel + ch_data->chid; 8764 ch = g->fifo.channel + ch_data->chid;
8491 8765
8492 ctx_resident = gr_gk20a_suspend_context(ch); 8766 ctx_resident = gr_gk20a_suspend_context(ch);
8493 if (ctx_resident) 8767 if (ctx_resident) {
8494 local_ctx_resident_ch_fd = ch_data->channel_fd; 8768 local_ctx_resident_ch_fd = ch_data->channel_fd;
8769 }
8495 } 8770 }
8496 8771
8497 nvgpu_mutex_release(&dbg_s->ch_list_lock); 8772 nvgpu_mutex_release(&dbg_s->ch_list_lock);
8498 8773
8499 err = gr_gk20a_enable_ctxsw(g); 8774 err = gr_gk20a_enable_ctxsw(g);
8500 if (err) 8775 if (err) {
8501 nvgpu_err(g, "unable to restart ctxsw!"); 8776 nvgpu_err(g, "unable to restart ctxsw!");
8777 }
8502 8778
8503 *ctx_resident_ch_fd = local_ctx_resident_ch_fd; 8779 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
8504 8780
@@ -8531,13 +8807,15 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
8531 ch = g->fifo.channel + ch_data->chid; 8807 ch = g->fifo.channel + ch_data->chid;
8532 8808
8533 ctx_resident = gr_gk20a_resume_context(ch); 8809 ctx_resident = gr_gk20a_resume_context(ch);
8534 if (ctx_resident) 8810 if (ctx_resident) {
8535 local_ctx_resident_ch_fd = ch_data->channel_fd; 8811 local_ctx_resident_ch_fd = ch_data->channel_fd;
8812 }
8536 } 8813 }
8537 8814
8538 err = gr_gk20a_enable_ctxsw(g); 8815 err = gr_gk20a_enable_ctxsw(g);
8539 if (err) 8816 if (err) {
8540 nvgpu_err(g, "unable to restart ctxsw!"); 8817 nvgpu_err(g, "unable to restart ctxsw!");
8818 }
8541 8819
8542 *ctx_resident_ch_fd = local_ctx_resident_ch_fd; 8820 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
8543 8821