diff options
author | Srirangan <smadhavan@nvidia.com> | 2018-08-12 03:43:36 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-09-06 05:28:48 -0400 |
commit | bc1ea8c9bfdd8614af3f282a52cbb1b74c4d9544 (patch) | |
tree | aa6e34eb6672a8d53498d8c11cbec446182fd5c7 /drivers/gpu/nvgpu/gk20a | |
parent | 6227e003920ae3bdcf8b0731fbd342158378958a (diff) |
nvgpu: gk20a: gr: Fix MISRA 15.6 violations
MISRA Rule-15.6 requires that all if-else blocks be enclosed in braces,
including single statement blocks. Fix errors due to single statement
if blocks without braces by introducing the braces.
JIRA NVGPU-671
Change-Id: Ie4bd8bffdafe6321e35394558dc9559f9c2d05c2
Signed-off-by: Srirangan <smadhavan@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1797689
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 896 |
1 files changed, 587 insertions, 309 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 7bfc48ad..91ffbb7e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -103,8 +103,9 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, | |||
103 | struct nvgpu_mem *mem = NULL; | 103 | struct nvgpu_mem *mem = NULL; |
104 | 104 | ||
105 | tsg = tsg_gk20a_from_ch(c); | 105 | tsg = tsg_gk20a_from_ch(c); |
106 | if (!tsg) | 106 | if (!tsg) { |
107 | return -EINVAL; | 107 | return -EINVAL; |
108 | } | ||
108 | 109 | ||
109 | gr_ctx = &tsg->gr_ctx; | 110 | gr_ctx = &tsg->gr_ctx; |
110 | mem = &gr_ctx->mem; | 111 | mem = &gr_ctx->mem; |
@@ -387,8 +388,9 @@ int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms, | |||
387 | u32 delay = expect_delay; | 388 | u32 delay = expect_delay; |
388 | struct nvgpu_timeout timeout; | 389 | struct nvgpu_timeout timeout; |
389 | 390 | ||
390 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | 391 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { |
391 | return 0; | 392 | return 0; |
393 | } | ||
392 | 394 | ||
393 | nvgpu_log_fn(g, " "); | 395 | nvgpu_log_fn(g, " "); |
394 | 396 | ||
@@ -424,41 +426,49 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | |||
424 | 426 | ||
425 | nvgpu_log_fn(g, " "); | 427 | nvgpu_log_fn(g, " "); |
426 | 428 | ||
427 | if (sleepduringwait) | 429 | if (sleepduringwait) { |
428 | delay = GR_IDLE_CHECK_DEFAULT; | 430 | delay = GR_IDLE_CHECK_DEFAULT; |
431 | } | ||
429 | 432 | ||
430 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | 433 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), |
431 | NVGPU_TIMER_CPU_TIMER); | 434 | NVGPU_TIMER_CPU_TIMER); |
432 | 435 | ||
433 | while (check == WAIT_UCODE_LOOP) { | 436 | while (check == WAIT_UCODE_LOOP) { |
434 | if (nvgpu_timeout_expired(&timeout)) | 437 | if (nvgpu_timeout_expired(&timeout)) { |
435 | check = WAIT_UCODE_TIMEOUT; | 438 | check = WAIT_UCODE_TIMEOUT; |
439 | } | ||
436 | 440 | ||
437 | reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id)); | 441 | reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id)); |
438 | 442 | ||
439 | if (mailbox_ret) | 443 | if (mailbox_ret) { |
440 | *mailbox_ret = reg; | 444 | *mailbox_ret = reg; |
445 | } | ||
441 | 446 | ||
442 | switch (opc_success) { | 447 | switch (opc_success) { |
443 | case GR_IS_UCODE_OP_EQUAL: | 448 | case GR_IS_UCODE_OP_EQUAL: |
444 | if (reg == mailbox_ok) | 449 | if (reg == mailbox_ok) { |
445 | check = WAIT_UCODE_OK; | 450 | check = WAIT_UCODE_OK; |
451 | } | ||
446 | break; | 452 | break; |
447 | case GR_IS_UCODE_OP_NOT_EQUAL: | 453 | case GR_IS_UCODE_OP_NOT_EQUAL: |
448 | if (reg != mailbox_ok) | 454 | if (reg != mailbox_ok) { |
449 | check = WAIT_UCODE_OK; | 455 | check = WAIT_UCODE_OK; |
456 | } | ||
450 | break; | 457 | break; |
451 | case GR_IS_UCODE_OP_AND: | 458 | case GR_IS_UCODE_OP_AND: |
452 | if (reg & mailbox_ok) | 459 | if (reg & mailbox_ok) { |
453 | check = WAIT_UCODE_OK; | 460 | check = WAIT_UCODE_OK; |
461 | } | ||
454 | break; | 462 | break; |
455 | case GR_IS_UCODE_OP_LESSER: | 463 | case GR_IS_UCODE_OP_LESSER: |
456 | if (reg < mailbox_ok) | 464 | if (reg < mailbox_ok) { |
457 | check = WAIT_UCODE_OK; | 465 | check = WAIT_UCODE_OK; |
466 | } | ||
458 | break; | 467 | break; |
459 | case GR_IS_UCODE_OP_LESSER_EQUAL: | 468 | case GR_IS_UCODE_OP_LESSER_EQUAL: |
460 | if (reg <= mailbox_ok) | 469 | if (reg <= mailbox_ok) { |
461 | check = WAIT_UCODE_OK; | 470 | check = WAIT_UCODE_OK; |
471 | } | ||
462 | break; | 472 | break; |
463 | case GR_IS_UCODE_OP_SKIP: | 473 | case GR_IS_UCODE_OP_SKIP: |
464 | /* do no success check */ | 474 | /* do no success check */ |
@@ -473,24 +483,29 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | |||
473 | 483 | ||
474 | switch (opc_fail) { | 484 | switch (opc_fail) { |
475 | case GR_IS_UCODE_OP_EQUAL: | 485 | case GR_IS_UCODE_OP_EQUAL: |
476 | if (reg == mailbox_fail) | 486 | if (reg == mailbox_fail) { |
477 | check = WAIT_UCODE_ERROR; | 487 | check = WAIT_UCODE_ERROR; |
488 | } | ||
478 | break; | 489 | break; |
479 | case GR_IS_UCODE_OP_NOT_EQUAL: | 490 | case GR_IS_UCODE_OP_NOT_EQUAL: |
480 | if (reg != mailbox_fail) | 491 | if (reg != mailbox_fail) { |
481 | check = WAIT_UCODE_ERROR; | 492 | check = WAIT_UCODE_ERROR; |
493 | } | ||
482 | break; | 494 | break; |
483 | case GR_IS_UCODE_OP_AND: | 495 | case GR_IS_UCODE_OP_AND: |
484 | if (reg & mailbox_fail) | 496 | if (reg & mailbox_fail) { |
485 | check = WAIT_UCODE_ERROR; | 497 | check = WAIT_UCODE_ERROR; |
498 | } | ||
486 | break; | 499 | break; |
487 | case GR_IS_UCODE_OP_LESSER: | 500 | case GR_IS_UCODE_OP_LESSER: |
488 | if (reg < mailbox_fail) | 501 | if (reg < mailbox_fail) { |
489 | check = WAIT_UCODE_ERROR; | 502 | check = WAIT_UCODE_ERROR; |
503 | } | ||
490 | break; | 504 | break; |
491 | case GR_IS_UCODE_OP_LESSER_EQUAL: | 505 | case GR_IS_UCODE_OP_LESSER_EQUAL: |
492 | if (reg <= mailbox_fail) | 506 | if (reg <= mailbox_fail) { |
493 | check = WAIT_UCODE_ERROR; | 507 | check = WAIT_UCODE_ERROR; |
508 | } | ||
494 | break; | 509 | break; |
495 | case GR_IS_UCODE_OP_SKIP: | 510 | case GR_IS_UCODE_OP_SKIP: |
496 | /* do no check on fail*/ | 511 | /* do no check on fail*/ |
@@ -505,8 +520,9 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | |||
505 | if (sleepduringwait) { | 520 | if (sleepduringwait) { |
506 | nvgpu_usleep_range(delay, delay * 2); | 521 | nvgpu_usleep_range(delay, delay * 2); |
507 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | 522 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); |
508 | } else | 523 | } else { |
509 | nvgpu_udelay(delay); | 524 | nvgpu_udelay(delay); |
525 | } | ||
510 | } | 526 | } |
511 | 527 | ||
512 | if (check == WAIT_UCODE_TIMEOUT) { | 528 | if (check == WAIT_UCODE_TIMEOUT) { |
@@ -539,9 +555,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
539 | 555 | ||
540 | nvgpu_mutex_acquire(&gr->fecs_mutex); | 556 | nvgpu_mutex_acquire(&gr->fecs_mutex); |
541 | 557 | ||
542 | if (op.mailbox.id != 0) | 558 | if (op.mailbox.id != 0) { |
543 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id), | 559 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id), |
544 | op.mailbox.data); | 560 | op.mailbox.data); |
561 | } | ||
545 | 562 | ||
546 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), | 563 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), |
547 | gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr)); | 564 | gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr)); |
@@ -552,16 +569,18 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
552 | 569 | ||
553 | /* op.mailbox.id == 4 cases require waiting for completion on | 570 | /* op.mailbox.id == 4 cases require waiting for completion on |
554 | * for op.mailbox.id == 0 */ | 571 | * for op.mailbox.id == 0 */ |
555 | if (op.mailbox.id == 4) | 572 | if (op.mailbox.id == 4) { |
556 | op.mailbox.id = 0; | 573 | op.mailbox.id = 0; |
574 | } | ||
557 | 575 | ||
558 | ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, | 576 | ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, |
559 | op.cond.ok, op.mailbox.ok, | 577 | op.cond.ok, op.mailbox.ok, |
560 | op.cond.fail, op.mailbox.fail, | 578 | op.cond.fail, op.mailbox.fail, |
561 | sleepduringwait); | 579 | sleepduringwait); |
562 | if (ret) | 580 | if (ret) { |
563 | nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x", | 581 | nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x", |
564 | op.method.data, op.method.addr); | 582 | op.method.data, op.method.addr); |
583 | } | ||
565 | 584 | ||
566 | nvgpu_mutex_release(&gr->fecs_mutex); | 585 | nvgpu_mutex_release(&gr->fecs_mutex); |
567 | 586 | ||
@@ -588,9 +607,10 @@ int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, | |||
588 | op.cond.ok, op.mailbox.ok, | 607 | op.cond.ok, op.mailbox.ok, |
589 | op.cond.fail, op.mailbox.fail, | 608 | op.cond.fail, op.mailbox.fail, |
590 | false); | 609 | false); |
591 | if (ret) | 610 | if (ret) { |
592 | nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x", | 611 | nvgpu_err(g,"fecs method: data=0x%08x push adr=0x%08x", |
593 | op.method.data, op.method.addr); | 612 | op.method.data, op.method.addr); |
613 | } | ||
594 | 614 | ||
595 | nvgpu_mutex_release(&gr->fecs_mutex); | 615 | nvgpu_mutex_release(&gr->fecs_mutex); |
596 | 616 | ||
@@ -620,9 +640,10 @@ int gr_gk20a_disable_ctxsw(struct gk20a *g) | |||
620 | 640 | ||
621 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); | 641 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); |
622 | g->ctxsw_disable_count++; | 642 | g->ctxsw_disable_count++; |
623 | if (g->ctxsw_disable_count == 1) | 643 | if (g->ctxsw_disable_count == 1) { |
624 | err = gr_gk20a_ctrl_ctxsw(g, | 644 | err = gr_gk20a_ctrl_ctxsw(g, |
625 | gr_fecs_method_push_adr_stop_ctxsw_v(), NULL); | 645 | gr_fecs_method_push_adr_stop_ctxsw_v(), NULL); |
646 | } | ||
626 | nvgpu_mutex_release(&g->ctxsw_disable_lock); | 647 | nvgpu_mutex_release(&g->ctxsw_disable_lock); |
627 | 648 | ||
628 | return err; | 649 | return err; |
@@ -638,9 +659,10 @@ int gr_gk20a_enable_ctxsw(struct gk20a *g) | |||
638 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); | 659 | nvgpu_mutex_acquire(&g->ctxsw_disable_lock); |
639 | g->ctxsw_disable_count--; | 660 | g->ctxsw_disable_count--; |
640 | WARN_ON(g->ctxsw_disable_count < 0); | 661 | WARN_ON(g->ctxsw_disable_count < 0); |
641 | if (g->ctxsw_disable_count == 0) | 662 | if (g->ctxsw_disable_count == 0) { |
642 | err = gr_gk20a_ctrl_ctxsw(g, | 663 | err = gr_gk20a_ctrl_ctxsw(g, |
643 | gr_fecs_method_push_adr_start_ctxsw_v(), NULL); | 664 | gr_fecs_method_push_adr_start_ctxsw_v(), NULL); |
665 | } | ||
644 | nvgpu_mutex_release(&g->ctxsw_disable_lock); | 666 | nvgpu_mutex_release(&g->ctxsw_disable_lock); |
645 | 667 | ||
646 | return err; | 668 | return err; |
@@ -779,9 +801,10 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | |||
779 | .fail = 0x20, }, | 801 | .fail = 0x20, }, |
780 | .cond.ok = GR_IS_UCODE_OP_AND, | 802 | .cond.ok = GR_IS_UCODE_OP_AND, |
781 | .cond.fail = GR_IS_UCODE_OP_AND}, true); | 803 | .cond.fail = GR_IS_UCODE_OP_AND}, true); |
782 | if (ret) | 804 | if (ret) { |
783 | nvgpu_err(g, | 805 | nvgpu_err(g, |
784 | "bind channel instance failed"); | 806 | "bind channel instance failed"); |
807 | } | ||
785 | 808 | ||
786 | return ret; | 809 | return ret; |
787 | } | 810 | } |
@@ -815,8 +838,9 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
815 | nvgpu_log_fn(g, " "); | 838 | nvgpu_log_fn(g, " "); |
816 | 839 | ||
817 | tsg = tsg_gk20a_from_ch(c); | 840 | tsg = tsg_gk20a_from_ch(c); |
818 | if (!tsg) | 841 | if (!tsg) { |
819 | return -EINVAL; | 842 | return -EINVAL; |
843 | } | ||
820 | 844 | ||
821 | gr_ctx = &tsg->gr_ctx; | 845 | gr_ctx = &tsg->gr_ctx; |
822 | mem = &gr_ctx->mem; | 846 | mem = &gr_ctx->mem; |
@@ -843,11 +867,12 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
843 | ctxsw_prog_main_image_zcull_o(), | 867 | ctxsw_prog_main_image_zcull_o(), |
844 | gr_ctx->zcull_ctx.ctx_sw_mode); | 868 | gr_ctx->zcull_ctx.ctx_sw_mode); |
845 | 869 | ||
846 | if (ctxheader->gpu_va) | 870 | if (ctxheader->gpu_va) { |
847 | g->ops.gr.write_zcull_ptr(g, ctxheader, | 871 | g->ops.gr.write_zcull_ptr(g, ctxheader, |
848 | gr_ctx->zcull_ctx.gpu_va); | 872 | gr_ctx->zcull_ctx.gpu_va); |
849 | else | 873 | } else { |
850 | g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); | 874 | g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); |
875 | } | ||
851 | 876 | ||
852 | gk20a_enable_channel_tsg(g, c); | 877 | gk20a_enable_channel_tsg(g, c); |
853 | 878 | ||
@@ -883,15 +908,17 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
883 | nvgpu_log_fn(g, " "); | 908 | nvgpu_log_fn(g, " "); |
884 | 909 | ||
885 | tsg = tsg_gk20a_from_ch(c); | 910 | tsg = tsg_gk20a_from_ch(c); |
886 | if (!tsg) | 911 | if (!tsg) { |
887 | return -EINVAL; | 912 | return -EINVAL; |
913 | } | ||
888 | 914 | ||
889 | gr_ctx = &tsg->gr_ctx; | 915 | gr_ctx = &tsg->gr_ctx; |
890 | if (patch) { | 916 | if (patch) { |
891 | int err; | 917 | int err; |
892 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); | 918 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); |
893 | if (err) | 919 | if (err) { |
894 | return err; | 920 | return err; |
921 | } | ||
895 | } | 922 | } |
896 | 923 | ||
897 | /* global pagepool buffer */ | 924 | /* global pagepool buffer */ |
@@ -903,8 +930,9 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
903 | size = gr->global_ctx_buffer[PAGEPOOL].mem.size / | 930 | size = gr->global_ctx_buffer[PAGEPOOL].mem.size / |
904 | gr_scc_pagepool_total_pages_byte_granularity_v(); | 931 | gr_scc_pagepool_total_pages_byte_granularity_v(); |
905 | 932 | ||
906 | if (size == g->ops.gr.pagepool_default_size(g)) | 933 | if (size == g->ops.gr.pagepool_default_size(g)) { |
907 | size = gr_scc_pagepool_total_pages_hwmax_v(); | 934 | size = gr_scc_pagepool_total_pages_hwmax_v(); |
935 | } | ||
908 | 936 | ||
909 | nvgpu_log_info(g, "pagepool buffer addr : 0x%016llx, size : %d", | 937 | nvgpu_log_info(g, "pagepool buffer addr : 0x%016llx, size : %d", |
910 | addr, size); | 938 | addr, size); |
@@ -934,8 +962,9 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
934 | g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch); | 962 | g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch); |
935 | g->ops.gr.commit_global_cb_manager(g, c, patch); | 963 | g->ops.gr.commit_global_cb_manager(g, c, patch); |
936 | 964 | ||
937 | if (patch) | 965 | if (patch) { |
938 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); | 966 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); |
967 | } | ||
939 | 968 | ||
940 | return 0; | 969 | return 0; |
941 | } | 970 | } |
@@ -996,8 +1025,9 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | |||
996 | */ | 1025 | */ |
997 | static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index) | 1026 | static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index) |
998 | { | 1027 | { |
999 | if (index >= gr->map_tile_count) | 1028 | if (index >= gr->map_tile_count) { |
1000 | return 0; | 1029 | return 0; |
1030 | } | ||
1001 | 1031 | ||
1002 | return gr->map_tiles[index]; | 1032 | return gr->map_tiles[index]; |
1003 | } | 1033 | } |
@@ -1008,8 +1038,9 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) | |||
1008 | u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; | 1038 | u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; |
1009 | u32 map0, map1, map2, map3, map4, map5; | 1039 | u32 map0, map1, map2, map3, map4, map5; |
1010 | 1040 | ||
1011 | if (!gr->map_tiles) | 1041 | if (!gr->map_tiles) { |
1012 | return -1; | 1042 | return -1; |
1043 | } | ||
1013 | 1044 | ||
1014 | nvgpu_log_fn(g, " "); | 1045 | nvgpu_log_fn(g, " "); |
1015 | 1046 | ||
@@ -1179,8 +1210,9 @@ int gr_gk20a_init_sm_id_table(struct gk20a *g) | |||
1179 | */ | 1210 | */ |
1180 | u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index) | 1211 | u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index) |
1181 | { | 1212 | { |
1182 | if (gpc_index >= gr->gpc_count) | 1213 | if (gpc_index >= gr->gpc_count) { |
1183 | return 0; | 1214 | return 0; |
1215 | } | ||
1184 | 1216 | ||
1185 | return gr->gpc_tpc_count[gpc_index]; | 1217 | return gr->gpc_tpc_count[gpc_index]; |
1186 | } | 1218 | } |
@@ -1199,12 +1231,14 @@ int gr_gk20a_init_fs_state(struct gk20a *g) | |||
1199 | 1231 | ||
1200 | if (g->ops.gr.init_sm_id_table) { | 1232 | if (g->ops.gr.init_sm_id_table) { |
1201 | err = g->ops.gr.init_sm_id_table(g); | 1233 | err = g->ops.gr.init_sm_id_table(g); |
1202 | if (err) | 1234 | if (err) { |
1203 | return err; | 1235 | return err; |
1236 | } | ||
1204 | 1237 | ||
1205 | /* Is table empty ? */ | 1238 | /* Is table empty ? */ |
1206 | if (g->gr.no_of_sm == 0) | 1239 | if (g->gr.no_of_sm == 0) { |
1207 | return -EINVAL; | 1240 | return -EINVAL; |
1241 | } | ||
1208 | } | 1242 | } |
1209 | 1243 | ||
1210 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | 1244 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { |
@@ -1213,8 +1247,9 @@ int gr_gk20a_init_fs_state(struct gk20a *g) | |||
1213 | 1247 | ||
1214 | g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); | 1248 | g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); |
1215 | 1249 | ||
1216 | if (g->ops.gr.program_active_tpc_counts) | 1250 | if (g->ops.gr.program_active_tpc_counts) { |
1217 | g->ops.gr.program_active_tpc_counts(g, gpc_index); | 1251 | g->ops.gr.program_active_tpc_counts(g, gpc_index); |
1252 | } | ||
1218 | } | 1253 | } |
1219 | 1254 | ||
1220 | for (reg_index = 0, gpc_id = 0; | 1255 | for (reg_index = 0, gpc_id = 0; |
@@ -1237,8 +1272,9 @@ int gr_gk20a_init_fs_state(struct gk20a *g) | |||
1237 | 1272 | ||
1238 | /* gr__setup_pd_mapping stubbed for gk20a */ | 1273 | /* gr__setup_pd_mapping stubbed for gk20a */ |
1239 | g->ops.gr.setup_rop_mapping(g, gr); | 1274 | g->ops.gr.setup_rop_mapping(g, gr); |
1240 | if (g->ops.gr.setup_alpha_beta_tables) | 1275 | if (g->ops.gr.setup_alpha_beta_tables) { |
1241 | g->ops.gr.setup_alpha_beta_tables(g, gr); | 1276 | g->ops.gr.setup_alpha_beta_tables(g, gr); |
1277 | } | ||
1242 | 1278 | ||
1243 | for (gpc_index = 0; | 1279 | for (gpc_index = 0; |
1244 | gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; | 1280 | gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; |
@@ -1291,8 +1327,9 @@ int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) | |||
1291 | .cond.fail = GR_IS_UCODE_OP_AND, | 1327 | .cond.fail = GR_IS_UCODE_OP_AND, |
1292 | }, true); | 1328 | }, true); |
1293 | 1329 | ||
1294 | if (ret) | 1330 | if (ret) { |
1295 | nvgpu_err(g, "save context image failed"); | 1331 | nvgpu_err(g, "save context image failed"); |
1332 | } | ||
1296 | 1333 | ||
1297 | return ret; | 1334 | return ret; |
1298 | } | 1335 | } |
@@ -1327,26 +1364,30 @@ u32 gk20a_init_sw_bundle(struct gk20a *g) | |||
1327 | err = gr_gk20a_wait_idle(g, | 1364 | err = gr_gk20a_wait_idle(g, |
1328 | gk20a_get_gr_idle_timeout(g), | 1365 | gk20a_get_gr_idle_timeout(g), |
1329 | GR_IDLE_CHECK_DEFAULT); | 1366 | GR_IDLE_CHECK_DEFAULT); |
1330 | if (err) | 1367 | if (err) { |
1331 | goto error; | 1368 | goto error; |
1369 | } | ||
1332 | } | 1370 | } |
1333 | 1371 | ||
1334 | err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g), | 1372 | err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g), |
1335 | GR_IDLE_CHECK_DEFAULT); | 1373 | GR_IDLE_CHECK_DEFAULT); |
1336 | if (err) | 1374 | if (err) { |
1337 | goto error; | 1375 | goto error; |
1376 | } | ||
1338 | } | 1377 | } |
1339 | 1378 | ||
1340 | if (!err && g->ops.gr.init_sw_veid_bundle) { | 1379 | if (!err && g->ops.gr.init_sw_veid_bundle) { |
1341 | err = g->ops.gr.init_sw_veid_bundle(g); | 1380 | err = g->ops.gr.init_sw_veid_bundle(g); |
1342 | if (err) | 1381 | if (err) { |
1343 | goto error; | 1382 | goto error; |
1383 | } | ||
1344 | } | 1384 | } |
1345 | 1385 | ||
1346 | if (g->ops.gr.init_sw_bundle64) { | 1386 | if (g->ops.gr.init_sw_bundle64) { |
1347 | err = g->ops.gr.init_sw_bundle64(g); | 1387 | err = g->ops.gr.init_sw_bundle64(g); |
1348 | if (err) | 1388 | if (err) { |
1349 | goto error; | 1389 | goto error; |
1390 | } | ||
1350 | } | 1391 | } |
1351 | 1392 | ||
1352 | /* disable pipe mode override */ | 1393 | /* disable pipe mode override */ |
@@ -1396,8 +1437,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1396 | nvgpu_log_fn(g, " "); | 1437 | nvgpu_log_fn(g, " "); |
1397 | 1438 | ||
1398 | tsg = tsg_gk20a_from_ch(c); | 1439 | tsg = tsg_gk20a_from_ch(c); |
1399 | if (!tsg) | 1440 | if (!tsg) { |
1400 | return -EINVAL; | 1441 | return -EINVAL; |
1442 | } | ||
1401 | 1443 | ||
1402 | gr_ctx = &tsg->gr_ctx; | 1444 | gr_ctx = &tsg->gr_ctx; |
1403 | gr_mem = &gr_ctx->mem; | 1445 | gr_mem = &gr_ctx->mem; |
@@ -1421,8 +1463,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1421 | gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f()); | 1463 | gr_fe_pwr_mode_req_send_f() | gr_fe_pwr_mode_mode_force_on_f()); |
1422 | do { | 1464 | do { |
1423 | u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r())); | 1465 | u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r())); |
1424 | if (req == gr_fe_pwr_mode_req_done_v()) | 1466 | if (req == gr_fe_pwr_mode_req_done_v()) { |
1425 | break; | 1467 | break; |
1468 | } | ||
1426 | nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT); | 1469 | nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT); |
1427 | } while (!nvgpu_timeout_expired_msg(&timeout, | 1470 | } while (!nvgpu_timeout_expired_msg(&timeout, |
1428 | "timeout forcing FE on")); | 1471 | "timeout forcing FE on")); |
@@ -1467,8 +1510,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1467 | 1510 | ||
1468 | do { | 1511 | do { |
1469 | u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r())); | 1512 | u32 req = gr_fe_pwr_mode_req_v(gk20a_readl(g, gr_fe_pwr_mode_r())); |
1470 | if (req == gr_fe_pwr_mode_req_done_v()) | 1513 | if (req == gr_fe_pwr_mode_req_done_v()) { |
1471 | break; | 1514 | break; |
1515 | } | ||
1472 | nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT); | 1516 | nvgpu_udelay(FE_PWR_MODE_TIMEOUT_DEFAULT); |
1473 | } while (!nvgpu_timeout_expired_msg(&timeout, | 1517 | } while (!nvgpu_timeout_expired_msg(&timeout, |
1474 | "timeout setting FE power to auto")); | 1518 | "timeout setting FE power to auto")); |
@@ -1479,8 +1523,9 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1479 | gr_scc_init_ram_trigger_f()); | 1523 | gr_scc_init_ram_trigger_f()); |
1480 | 1524 | ||
1481 | err = gr_gk20a_fecs_ctx_bind_channel(g, c); | 1525 | err = gr_gk20a_fecs_ctx_bind_channel(g, c); |
1482 | if (err) | 1526 | if (err) { |
1483 | goto clean_up; | 1527 | goto clean_up; |
1528 | } | ||
1484 | 1529 | ||
1485 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 1530 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
1486 | GR_IDLE_CHECK_DEFAULT); | 1531 | GR_IDLE_CHECK_DEFAULT); |
@@ -1491,41 +1536,48 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1491 | sw_ctx_load->l[i].value); | 1536 | sw_ctx_load->l[i].value); |
1492 | } | 1537 | } |
1493 | 1538 | ||
1494 | if (g->ops.gr.init_preemption_state) | 1539 | if (g->ops.gr.init_preemption_state) { |
1495 | g->ops.gr.init_preemption_state(g); | 1540 | g->ops.gr.init_preemption_state(g); |
1541 | } | ||
1496 | 1542 | ||
1497 | if (g->ops.clock_gating.blcg_gr_load_gating_prod) | 1543 | if (g->ops.clock_gating.blcg_gr_load_gating_prod) { |
1498 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); | 1544 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); |
1545 | } | ||
1499 | 1546 | ||
1500 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 1547 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
1501 | GR_IDLE_CHECK_DEFAULT); | 1548 | GR_IDLE_CHECK_DEFAULT); |
1502 | if (err) | 1549 | if (err) { |
1503 | goto clean_up; | 1550 | goto clean_up; |
1551 | } | ||
1504 | 1552 | ||
1505 | /* disable fe_go_idle */ | 1553 | /* disable fe_go_idle */ |
1506 | gk20a_writel(g, gr_fe_go_idle_timeout_r(), | 1554 | gk20a_writel(g, gr_fe_go_idle_timeout_r(), |
1507 | gr_fe_go_idle_timeout_count_disabled_f()); | 1555 | gr_fe_go_idle_timeout_count_disabled_f()); |
1508 | 1556 | ||
1509 | err = g->ops.gr.commit_global_ctx_buffers(g, c, false); | 1557 | err = g->ops.gr.commit_global_ctx_buffers(g, c, false); |
1510 | if (err) | 1558 | if (err) { |
1511 | goto clean_up; | 1559 | goto clean_up; |
1560 | } | ||
1512 | 1561 | ||
1513 | /* override a few ctx state registers */ | 1562 | /* override a few ctx state registers */ |
1514 | g->ops.gr.commit_global_timeslice(g, c); | 1563 | g->ops.gr.commit_global_timeslice(g, c); |
1515 | 1564 | ||
1516 | /* floorsweep anything left */ | 1565 | /* floorsweep anything left */ |
1517 | err = g->ops.gr.init_fs_state(g); | 1566 | err = g->ops.gr.init_fs_state(g); |
1518 | if (err) | 1567 | if (err) { |
1519 | goto clean_up; | 1568 | goto clean_up; |
1569 | } | ||
1520 | 1570 | ||
1521 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 1571 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
1522 | GR_IDLE_CHECK_DEFAULT); | 1572 | GR_IDLE_CHECK_DEFAULT); |
1523 | if (err) | 1573 | if (err) { |
1524 | goto restore_fe_go_idle; | 1574 | goto restore_fe_go_idle; |
1575 | } | ||
1525 | 1576 | ||
1526 | err = gk20a_init_sw_bundle(g); | 1577 | err = gk20a_init_sw_bundle(g); |
1527 | if (err) | 1578 | if (err) { |
1528 | goto clean_up; | 1579 | goto clean_up; |
1580 | } | ||
1529 | 1581 | ||
1530 | restore_fe_go_idle: | 1582 | restore_fe_go_idle: |
1531 | /* restore fe_go_idle */ | 1583 | /* restore fe_go_idle */ |
@@ -1533,8 +1585,9 @@ restore_fe_go_idle: | |||
1533 | gr_fe_go_idle_timeout_count_prod_f()); | 1585 | gr_fe_go_idle_timeout_count_prod_f()); |
1534 | 1586 | ||
1535 | if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 1587 | if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
1536 | GR_IDLE_CHECK_DEFAULT)) | 1588 | GR_IDLE_CHECK_DEFAULT)) { |
1537 | goto clean_up; | 1589 | goto clean_up; |
1590 | } | ||
1538 | 1591 | ||
1539 | /* load method init */ | 1592 | /* load method init */ |
1540 | if (sw_method_init->count) { | 1593 | if (sw_method_init->count) { |
@@ -1558,8 +1611,9 @@ restore_fe_go_idle: | |||
1558 | 1611 | ||
1559 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 1612 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
1560 | GR_IDLE_CHECK_DEFAULT); | 1613 | GR_IDLE_CHECK_DEFAULT); |
1561 | if (err) | 1614 | if (err) { |
1562 | goto clean_up; | 1615 | goto clean_up; |
1616 | } | ||
1563 | 1617 | ||
1564 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); | 1618 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); |
1565 | ctx_header_words >>= 2; | 1619 | ctx_header_words >>= 2; |
@@ -1576,8 +1630,9 @@ restore_fe_go_idle: | |||
1576 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); | 1630 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); |
1577 | 1631 | ||
1578 | err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | 1632 | err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); |
1579 | if (err) | 1633 | if (err) { |
1580 | goto clean_up; | 1634 | goto clean_up; |
1635 | } | ||
1581 | 1636 | ||
1582 | gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); | 1637 | gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); |
1583 | 1638 | ||
@@ -1599,8 +1654,9 @@ restore_fe_go_idle: | |||
1599 | } | 1654 | } |
1600 | 1655 | ||
1601 | err = g->ops.gr.commit_inst(c, gr_mem->gpu_va); | 1656 | err = g->ops.gr.commit_inst(c, gr_mem->gpu_va); |
1602 | if (err) | 1657 | if (err) { |
1603 | goto clean_up; | 1658 | goto clean_up; |
1659 | } | ||
1604 | 1660 | ||
1605 | gr->ctx_vars.golden_image_initialized = true; | 1661 | gr->ctx_vars.golden_image_initialized = true; |
1606 | 1662 | ||
@@ -1608,10 +1664,11 @@ restore_fe_go_idle: | |||
1608 | gr_fecs_current_ctx_valid_false_f()); | 1664 | gr_fecs_current_ctx_valid_false_f()); |
1609 | 1665 | ||
1610 | clean_up: | 1666 | clean_up: |
1611 | if (err) | 1667 | if (err) { |
1612 | nvgpu_err(g, "fail"); | 1668 | nvgpu_err(g, "fail"); |
1613 | else | 1669 | } else { |
1614 | nvgpu_log_fn(g, "done"); | 1670 | nvgpu_log_fn(g, "done"); |
1671 | } | ||
1615 | 1672 | ||
1616 | nvgpu_mutex_release(&gr->ctx_mutex); | 1673 | nvgpu_mutex_release(&gr->ctx_mutex); |
1617 | return err; | 1674 | return err; |
@@ -1630,8 +1687,9 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1630 | nvgpu_log_fn(g, " "); | 1687 | nvgpu_log_fn(g, " "); |
1631 | 1688 | ||
1632 | tsg = tsg_gk20a_from_ch(c); | 1689 | tsg = tsg_gk20a_from_ch(c); |
1633 | if (!tsg) | 1690 | if (!tsg) { |
1634 | return -EINVAL; | 1691 | return -EINVAL; |
1692 | } | ||
1635 | 1693 | ||
1636 | gr_ctx = &tsg->gr_ctx; | 1694 | gr_ctx = &tsg->gr_ctx; |
1637 | mem = &gr_ctx->mem; | 1695 | mem = &gr_ctx->mem; |
@@ -1689,8 +1747,9 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1689 | nvgpu_log_fn(g, " "); | 1747 | nvgpu_log_fn(g, " "); |
1690 | 1748 | ||
1691 | tsg = tsg_gk20a_from_ch(c); | 1749 | tsg = tsg_gk20a_from_ch(c); |
1692 | if (!tsg) | 1750 | if (!tsg) { |
1693 | return -EINVAL; | 1751 | return -EINVAL; |
1752 | } | ||
1694 | 1753 | ||
1695 | gr_ctx = &tsg->gr_ctx; | 1754 | gr_ctx = &tsg->gr_ctx; |
1696 | pm_ctx = &gr_ctx->pm_ctx; | 1755 | pm_ctx = &gr_ctx->pm_ctx; |
@@ -1800,10 +1859,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1800 | 1859 | ||
1801 | nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); | 1860 | nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); |
1802 | 1861 | ||
1803 | if (ctxheader->gpu_va) | 1862 | if (ctxheader->gpu_va) { |
1804 | g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr); | 1863 | g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr); |
1805 | else | 1864 | } else { |
1806 | g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr); | 1865 | g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr); |
1866 | } | ||
1807 | 1867 | ||
1808 | /* enable channel */ | 1868 | /* enable channel */ |
1809 | gk20a_enable_channel_tsg(g, c); | 1869 | gk20a_enable_channel_tsg(g, c); |
@@ -1837,13 +1897,15 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1837 | nvgpu_log_fn(g, " "); | 1897 | nvgpu_log_fn(g, " "); |
1838 | 1898 | ||
1839 | tsg = tsg_gk20a_from_ch(c); | 1899 | tsg = tsg_gk20a_from_ch(c); |
1840 | if (!tsg) | 1900 | if (!tsg) { |
1841 | return -EINVAL; | 1901 | return -EINVAL; |
1902 | } | ||
1842 | 1903 | ||
1843 | gr_ctx = &tsg->gr_ctx; | 1904 | gr_ctx = &tsg->gr_ctx; |
1844 | mem = &gr_ctx->mem; | 1905 | mem = &gr_ctx->mem; |
1845 | if (gr->ctx_vars.local_golden_image == NULL) | 1906 | if (gr->ctx_vars.local_golden_image == NULL) { |
1846 | return -EINVAL; | 1907 | return -EINVAL; |
1908 | } | ||
1847 | 1909 | ||
1848 | /* Channel gr_ctx buffer is gpu cacheable. | 1910 | /* Channel gr_ctx buffer is gpu cacheable. |
1849 | Flush and invalidate before cpu update. */ | 1911 | Flush and invalidate before cpu update. */ |
@@ -1853,11 +1915,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1853 | gr->ctx_vars.local_golden_image, | 1915 | gr->ctx_vars.local_golden_image, |
1854 | gr->ctx_vars.golden_image_size); | 1916 | gr->ctx_vars.golden_image_size); |
1855 | 1917 | ||
1856 | if (g->ops.gr.init_ctxsw_hdr_data) | 1918 | if (g->ops.gr.init_ctxsw_hdr_data) { |
1857 | g->ops.gr.init_ctxsw_hdr_data(g, mem); | 1919 | g->ops.gr.init_ctxsw_hdr_data(g, mem); |
1920 | } | ||
1858 | 1921 | ||
1859 | if (g->ops.gr.enable_cde_in_fecs && c->cde) | 1922 | if (g->ops.gr.enable_cde_in_fecs && c->cde) { |
1860 | g->ops.gr.enable_cde_in_fecs(g, mem); | 1923 | g->ops.gr.enable_cde_in_fecs(g, mem); |
1924 | } | ||
1861 | 1925 | ||
1862 | /* set priv access map */ | 1926 | /* set priv access map */ |
1863 | virt_addr_lo = | 1927 | virt_addr_lo = |
@@ -1865,10 +1929,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1865 | virt_addr_hi = | 1929 | virt_addr_hi = |
1866 | u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | 1930 | u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); |
1867 | 1931 | ||
1868 | if (g->allow_all) | 1932 | if (g->allow_all) { |
1869 | data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); | 1933 | data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); |
1870 | else | 1934 | } else { |
1871 | data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); | 1935 | data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); |
1936 | } | ||
1872 | 1937 | ||
1873 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), | 1938 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), |
1874 | data); | 1939 | data); |
@@ -1886,11 +1951,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1886 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); | 1951 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); |
1887 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); | 1952 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); |
1888 | 1953 | ||
1889 | if (g->ops.gr.update_ctxsw_preemption_mode) | 1954 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
1890 | g->ops.gr.update_ctxsw_preemption_mode(g, c, mem); | 1955 | g->ops.gr.update_ctxsw_preemption_mode(g, c, mem); |
1956 | } | ||
1891 | 1957 | ||
1892 | if (g->ops.gr.update_boosted_ctx) | 1958 | if (g->ops.gr.update_boosted_ctx) { |
1893 | g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); | 1959 | g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); |
1960 | } | ||
1894 | 1961 | ||
1895 | virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); | 1962 | virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); |
1896 | virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); | 1963 | virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); |
@@ -1919,8 +1986,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1919 | } | 1986 | } |
1920 | 1987 | ||
1921 | virt_addr = gr_ctx->pm_ctx.mem.gpu_va; | 1988 | virt_addr = gr_ctx->pm_ctx.mem.gpu_va; |
1922 | } else | 1989 | } else { |
1923 | virt_addr = 0; | 1990 | virt_addr = 0; |
1991 | } | ||
1924 | 1992 | ||
1925 | data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); | 1993 | data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); |
1926 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1994 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
@@ -1957,8 +2025,9 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | |||
1957 | int err; | 2025 | int err; |
1958 | 2026 | ||
1959 | err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc); | 2027 | err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc); |
1960 | if (err) | 2028 | if (err) { |
1961 | return err; | 2029 | return err; |
2030 | } | ||
1962 | 2031 | ||
1963 | g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0); | 2032 | g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0); |
1964 | 2033 | ||
@@ -2070,8 +2139,9 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2070 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); | 2139 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); |
2071 | 2140 | ||
2072 | err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc); | 2141 | err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc); |
2073 | if (err) | 2142 | if (err) { |
2074 | goto clean_up; | 2143 | goto clean_up; |
2144 | } | ||
2075 | 2145 | ||
2076 | gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, | 2146 | gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, |
2077 | &ucode_info->fecs, | 2147 | &ucode_info->fecs, |
@@ -2092,15 +2162,17 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2092 | gpccs_fw = NULL; | 2162 | gpccs_fw = NULL; |
2093 | 2163 | ||
2094 | err = gr_gk20a_init_ctxsw_ucode_vaspace(g); | 2164 | err = gr_gk20a_init_ctxsw_ucode_vaspace(g); |
2095 | if (err) | 2165 | if (err) { |
2096 | goto clean_up; | 2166 | goto clean_up; |
2167 | } | ||
2097 | 2168 | ||
2098 | return 0; | 2169 | return 0; |
2099 | 2170 | ||
2100 | clean_up: | 2171 | clean_up: |
2101 | if (ucode_info->surface_desc.gpu_va) | 2172 | if (ucode_info->surface_desc.gpu_va) { |
2102 | nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc, | 2173 | nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc, |
2103 | ucode_info->surface_desc.gpu_va); | 2174 | ucode_info->surface_desc.gpu_va); |
2175 | } | ||
2104 | nvgpu_dma_free(g, &ucode_info->surface_desc); | 2176 | nvgpu_dma_free(g, &ucode_info->surface_desc); |
2105 | 2177 | ||
2106 | nvgpu_release_firmware(g, gpccs_fw); | 2178 | nvgpu_release_firmware(g, gpccs_fw); |
@@ -2123,9 +2195,10 @@ static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g) | |||
2123 | val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); | 2195 | val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); |
2124 | } | 2196 | } |
2125 | 2197 | ||
2126 | if (!retries) | 2198 | if (!retries) { |
2127 | nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x", | 2199 | nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x", |
2128 | gk20a_readl(g, gr_fecs_arb_ctx_cmd_r())); | 2200 | gk20a_readl(g, gr_fecs_arb_ctx_cmd_r())); |
2201 | } | ||
2129 | 2202 | ||
2130 | retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT; | 2203 | retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT; |
2131 | while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & | 2204 | while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & |
@@ -2358,8 +2431,9 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) | |||
2358 | if (!g->gr.skip_ucode_init) { | 2431 | if (!g->gr.skip_ucode_init) { |
2359 | err = gr_gk20a_init_ctxsw_ucode(g); | 2432 | err = gr_gk20a_init_ctxsw_ucode(g); |
2360 | 2433 | ||
2361 | if (err) | 2434 | if (err) { |
2362 | return err; | 2435 | return err; |
2436 | } | ||
2363 | } | 2437 | } |
2364 | gr_gk20a_load_falcon_with_bootloader(g); | 2438 | gr_gk20a_load_falcon_with_bootloader(g); |
2365 | g->gr.skip_ucode_init = true; | 2439 | g->gr.skip_ucode_init = true; |
@@ -2384,9 +2458,10 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) | |||
2384 | } | 2458 | } |
2385 | 2459 | ||
2386 | if (nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP) || | 2460 | if (nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP) || |
2387 | nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) | 2461 | nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { |
2388 | gk20a_writel(g, gr_fecs_current_ctx_r(), | 2462 | gk20a_writel(g, gr_fecs_current_ctx_r(), |
2389 | gr_fecs_current_ctx_valid_false_f()); | 2463 | gr_fecs_current_ctx_valid_false_f()); |
2464 | } | ||
2390 | 2465 | ||
2391 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff); | 2466 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff); |
2392 | gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff); | 2467 | gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff); |
@@ -2452,8 +2527,9 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) | |||
2452 | void gk20a_gr_destroy_ctx_buffer(struct gk20a *g, | 2527 | void gk20a_gr_destroy_ctx_buffer(struct gk20a *g, |
2453 | struct gr_ctx_buffer_desc *desc) | 2528 | struct gr_ctx_buffer_desc *desc) |
2454 | { | 2529 | { |
2455 | if (!desc) | 2530 | if (!desc) { |
2456 | return; | 2531 | return; |
2532 | } | ||
2457 | nvgpu_dma_free(g, &desc->mem); | 2533 | nvgpu_dma_free(g, &desc->mem); |
2458 | desc->destroy = NULL; | 2534 | desc->destroy = NULL; |
2459 | } | 2535 | } |
@@ -2466,12 +2542,14 @@ int gk20a_gr_alloc_ctx_buffer(struct gk20a *g, | |||
2466 | 2542 | ||
2467 | nvgpu_log_fn(g, " "); | 2543 | nvgpu_log_fn(g, " "); |
2468 | 2544 | ||
2469 | if (nvgpu_mem_is_valid(&desc->mem)) | 2545 | if (nvgpu_mem_is_valid(&desc->mem)) { |
2470 | return 0; | 2546 | return 0; |
2547 | } | ||
2471 | 2548 | ||
2472 | err = nvgpu_dma_alloc_sys(g, size, &desc->mem); | 2549 | err = nvgpu_dma_alloc_sys(g, size, &desc->mem); |
2473 | if (err) | 2550 | if (err) { |
2474 | return err; | 2551 | return err; |
2552 | } | ||
2475 | 2553 | ||
2476 | desc->destroy = gk20a_gr_destroy_ctx_buffer; | 2554 | desc->destroy = gk20a_gr_destroy_ctx_buffer; |
2477 | 2555 | ||
@@ -2513,45 +2591,51 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2513 | 2591 | ||
2514 | err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[CIRCULAR], | 2592 | err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[CIRCULAR], |
2515 | cb_buffer_size); | 2593 | cb_buffer_size); |
2516 | if (err) | 2594 | if (err) { |
2517 | goto clean_up; | 2595 | goto clean_up; |
2596 | } | ||
2518 | 2597 | ||
2519 | if (g->ops.secure_alloc) { | 2598 | if (g->ops.secure_alloc) { |
2520 | err = g->ops.secure_alloc(g, | 2599 | err = g->ops.secure_alloc(g, |
2521 | &gr->global_ctx_buffer[CIRCULAR_VPR], | 2600 | &gr->global_ctx_buffer[CIRCULAR_VPR], |
2522 | cb_buffer_size); | 2601 | cb_buffer_size); |
2523 | if (err) | 2602 | if (err) { |
2524 | goto clean_up; | 2603 | goto clean_up; |
2604 | } | ||
2525 | } | 2605 | } |
2526 | 2606 | ||
2527 | nvgpu_log_info(g, "pagepool_buffer_size : %d", pagepool_buffer_size); | 2607 | nvgpu_log_info(g, "pagepool_buffer_size : %d", pagepool_buffer_size); |
2528 | 2608 | ||
2529 | err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[PAGEPOOL], | 2609 | err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[PAGEPOOL], |
2530 | pagepool_buffer_size); | 2610 | pagepool_buffer_size); |
2531 | if (err) | 2611 | if (err) { |
2532 | goto clean_up; | 2612 | goto clean_up; |
2613 | } | ||
2533 | 2614 | ||
2534 | if (g->ops.secure_alloc) { | 2615 | if (g->ops.secure_alloc) { |
2535 | err = g->ops.secure_alloc(g, | 2616 | err = g->ops.secure_alloc(g, |
2536 | &gr->global_ctx_buffer[PAGEPOOL_VPR], | 2617 | &gr->global_ctx_buffer[PAGEPOOL_VPR], |
2537 | pagepool_buffer_size); | 2618 | pagepool_buffer_size); |
2538 | if (err) | 2619 | if (err) { |
2539 | goto clean_up; | 2620 | goto clean_up; |
2621 | } | ||
2540 | } | 2622 | } |
2541 | 2623 | ||
2542 | nvgpu_log_info(g, "attr_buffer_size : %d", attr_buffer_size); | 2624 | nvgpu_log_info(g, "attr_buffer_size : %d", attr_buffer_size); |
2543 | 2625 | ||
2544 | err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[ATTRIBUTE], | 2626 | err = gk20a_gr_alloc_ctx_buffer(g, &gr->global_ctx_buffer[ATTRIBUTE], |
2545 | attr_buffer_size); | 2627 | attr_buffer_size); |
2546 | if (err) | 2628 | if (err) { |
2547 | goto clean_up; | 2629 | goto clean_up; |
2630 | } | ||
2548 | 2631 | ||
2549 | if (g->ops.secure_alloc) { | 2632 | if (g->ops.secure_alloc) { |
2550 | err = g->ops.secure_alloc(g, | 2633 | err = g->ops.secure_alloc(g, |
2551 | &gr->global_ctx_buffer[ATTRIBUTE_VPR], | 2634 | &gr->global_ctx_buffer[ATTRIBUTE_VPR], |
2552 | attr_buffer_size); | 2635 | attr_buffer_size); |
2553 | if (err) | 2636 | if (err) { |
2554 | goto clean_up; | 2637 | goto clean_up; |
2638 | } | ||
2555 | } | 2639 | } |
2556 | 2640 | ||
2557 | nvgpu_log_info(g, "golden_image_size : %d", | 2641 | nvgpu_log_info(g, "golden_image_size : %d", |
@@ -2560,8 +2644,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2560 | err = gk20a_gr_alloc_ctx_buffer(g, | 2644 | err = gk20a_gr_alloc_ctx_buffer(g, |
2561 | &gr->global_ctx_buffer[GOLDEN_CTX], | 2645 | &gr->global_ctx_buffer[GOLDEN_CTX], |
2562 | gr->ctx_vars.golden_image_size); | 2646 | gr->ctx_vars.golden_image_size); |
2563 | if (err) | 2647 | if (err) { |
2564 | goto clean_up; | 2648 | goto clean_up; |
2649 | } | ||
2565 | 2650 | ||
2566 | nvgpu_log_info(g, "priv_access_map_size : %d", | 2651 | nvgpu_log_info(g, "priv_access_map_size : %d", |
2567 | gr->ctx_vars.priv_access_map_size); | 2652 | gr->ctx_vars.priv_access_map_size); |
@@ -2570,8 +2655,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2570 | &gr->global_ctx_buffer[PRIV_ACCESS_MAP], | 2655 | &gr->global_ctx_buffer[PRIV_ACCESS_MAP], |
2571 | gr->ctx_vars.priv_access_map_size); | 2656 | gr->ctx_vars.priv_access_map_size); |
2572 | 2657 | ||
2573 | if (err) | 2658 | if (err) { |
2574 | goto clean_up; | 2659 | goto clean_up; |
2660 | } | ||
2575 | 2661 | ||
2576 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 2662 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
2577 | nvgpu_log_info(g, "fecs_trace_buffer_size : %d", | 2663 | nvgpu_log_info(g, "fecs_trace_buffer_size : %d", |
@@ -2644,8 +2730,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2644 | nvgpu_log_fn(g, " "); | 2730 | nvgpu_log_fn(g, " "); |
2645 | 2731 | ||
2646 | tsg = tsg_gk20a_from_ch(c); | 2732 | tsg = tsg_gk20a_from_ch(c); |
2647 | if (!tsg) | 2733 | if (!tsg) { |
2648 | return -EINVAL; | 2734 | return -EINVAL; |
2735 | } | ||
2649 | 2736 | ||
2650 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; | 2737 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; |
2651 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; | 2738 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; |
@@ -2664,8 +2751,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2664 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, | 2751 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, |
2665 | NVGPU_VM_MAP_CACHEABLE, | 2752 | NVGPU_VM_MAP_CACHEABLE, |
2666 | gk20a_mem_flag_none, true, mem->aperture); | 2753 | gk20a_mem_flag_none, true, mem->aperture); |
2667 | if (!gpu_va) | 2754 | if (!gpu_va) { |
2668 | goto clean_up; | 2755 | goto clean_up; |
2756 | } | ||
2669 | g_bfr_va[CIRCULAR_VA] = gpu_va; | 2757 | g_bfr_va[CIRCULAR_VA] = gpu_va; |
2670 | g_bfr_size[CIRCULAR_VA] = mem->size; | 2758 | g_bfr_size[CIRCULAR_VA] = mem->size; |
2671 | 2759 | ||
@@ -2682,8 +2770,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2682 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, | 2770 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, |
2683 | NVGPU_VM_MAP_CACHEABLE, | 2771 | NVGPU_VM_MAP_CACHEABLE, |
2684 | gk20a_mem_flag_none, false, mem->aperture); | 2772 | gk20a_mem_flag_none, false, mem->aperture); |
2685 | if (!gpu_va) | 2773 | if (!gpu_va) { |
2686 | goto clean_up; | 2774 | goto clean_up; |
2775 | } | ||
2687 | g_bfr_va[ATTRIBUTE_VA] = gpu_va; | 2776 | g_bfr_va[ATTRIBUTE_VA] = gpu_va; |
2688 | g_bfr_size[ATTRIBUTE_VA] = mem->size; | 2777 | g_bfr_size[ATTRIBUTE_VA] = mem->size; |
2689 | 2778 | ||
@@ -2700,8 +2789,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2700 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, | 2789 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, |
2701 | NVGPU_VM_MAP_CACHEABLE, | 2790 | NVGPU_VM_MAP_CACHEABLE, |
2702 | gk20a_mem_flag_none, true, mem->aperture); | 2791 | gk20a_mem_flag_none, true, mem->aperture); |
2703 | if (!gpu_va) | 2792 | if (!gpu_va) { |
2704 | goto clean_up; | 2793 | goto clean_up; |
2794 | } | ||
2705 | g_bfr_va[PAGEPOOL_VA] = gpu_va; | 2795 | g_bfr_va[PAGEPOOL_VA] = gpu_va; |
2706 | g_bfr_size[PAGEPOOL_VA] = mem->size; | 2796 | g_bfr_size[PAGEPOOL_VA] = mem->size; |
2707 | 2797 | ||
@@ -2709,8 +2799,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2709 | mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; | 2799 | mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; |
2710 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, | 2800 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, |
2711 | gk20a_mem_flag_none, true, mem->aperture); | 2801 | gk20a_mem_flag_none, true, mem->aperture); |
2712 | if (!gpu_va) | 2802 | if (!gpu_va) { |
2713 | goto clean_up; | 2803 | goto clean_up; |
2804 | } | ||
2714 | g_bfr_va[GOLDEN_CTX_VA] = gpu_va; | 2805 | g_bfr_va[GOLDEN_CTX_VA] = gpu_va; |
2715 | g_bfr_size[GOLDEN_CTX_VA] = mem->size; | 2806 | g_bfr_size[GOLDEN_CTX_VA] = mem->size; |
2716 | g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX; | 2807 | g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX; |
@@ -2719,8 +2810,9 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2719 | mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; | 2810 | mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; |
2720 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, | 2811 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, |
2721 | gk20a_mem_flag_none, true, mem->aperture); | 2812 | gk20a_mem_flag_none, true, mem->aperture); |
2722 | if (!gpu_va) | 2813 | if (!gpu_va) { |
2723 | goto clean_up; | 2814 | goto clean_up; |
2815 | } | ||
2724 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; | 2816 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; |
2725 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; | 2817 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; |
2726 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; | 2818 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; |
@@ -2759,16 +2851,18 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2759 | 2851 | ||
2760 | nvgpu_log_fn(g, " "); | 2852 | nvgpu_log_fn(g, " "); |
2761 | 2853 | ||
2762 | if (gr->ctx_vars.buffer_size == 0) | 2854 | if (gr->ctx_vars.buffer_size == 0) { |
2763 | return 0; | 2855 | return 0; |
2856 | } | ||
2764 | 2857 | ||
2765 | /* alloc channel gr ctx buffer */ | 2858 | /* alloc channel gr ctx buffer */ |
2766 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | 2859 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; |
2767 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 2860 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
2768 | 2861 | ||
2769 | err = nvgpu_dma_alloc(g, gr->ctx_vars.buffer_total_size, &gr_ctx->mem); | 2862 | err = nvgpu_dma_alloc(g, gr->ctx_vars.buffer_total_size, &gr_ctx->mem); |
2770 | if (err) | 2863 | if (err) { |
2771 | return err; | 2864 | return err; |
2865 | } | ||
2772 | 2866 | ||
2773 | gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, | 2867 | gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, |
2774 | &gr_ctx->mem, | 2868 | &gr_ctx->mem, |
@@ -2776,8 +2870,9 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2776 | 0, /* not GPU-cacheable */ | 2870 | 0, /* not GPU-cacheable */ |
2777 | gk20a_mem_flag_none, true, | 2871 | gk20a_mem_flag_none, true, |
2778 | gr_ctx->mem.aperture); | 2872 | gr_ctx->mem.aperture); |
2779 | if (!gr_ctx->mem.gpu_va) | 2873 | if (!gr_ctx->mem.gpu_va) { |
2780 | goto err_free_mem; | 2874 | goto err_free_mem; |
2875 | } | ||
2781 | 2876 | ||
2782 | return 0; | 2877 | return 0; |
2783 | 2878 | ||
@@ -2799,8 +2894,9 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2799 | } | 2894 | } |
2800 | 2895 | ||
2801 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding); | 2896 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding); |
2802 | if (err) | 2897 | if (err) { |
2803 | return err; | 2898 | return err; |
2899 | } | ||
2804 | 2900 | ||
2805 | gr_ctx->tsgid = tsg->tsgid; | 2901 | gr_ctx->tsgid = tsg->tsgid; |
2806 | 2902 | ||
@@ -2818,8 +2914,9 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g, | |||
2818 | gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx); | 2914 | gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx); |
2819 | 2915 | ||
2820 | if (g->ops.gr.dump_ctxsw_stats && | 2916 | if (g->ops.gr.dump_ctxsw_stats && |
2821 | g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) | 2917 | g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) { |
2822 | g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); | 2918 | g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); |
2919 | } | ||
2823 | 2920 | ||
2824 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | 2921 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); |
2825 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | 2922 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); |
@@ -2859,8 +2956,9 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | |||
2859 | nvgpu_log_fn(g, " "); | 2956 | nvgpu_log_fn(g, " "); |
2860 | 2957 | ||
2861 | tsg = tsg_gk20a_from_ch(c); | 2958 | tsg = tsg_gk20a_from_ch(c); |
2862 | if (!tsg) | 2959 | if (!tsg) { |
2863 | return -EINVAL; | 2960 | return -EINVAL; |
2961 | } | ||
2864 | 2962 | ||
2865 | patch_ctx = &tsg->gr_ctx.patch_ctx; | 2963 | patch_ctx = &tsg->gr_ctx.patch_ctx; |
2866 | alloc_size = g->ops.gr.get_patch_slots(g) * | 2964 | alloc_size = g->ops.gr.get_patch_slots(g) * |
@@ -2871,8 +2969,9 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | |||
2871 | 2969 | ||
2872 | err = nvgpu_dma_alloc_map_sys(ch_vm, | 2970 | err = nvgpu_dma_alloc_map_sys(ch_vm, |
2873 | alloc_size * sizeof(u32), &patch_ctx->mem); | 2971 | alloc_size * sizeof(u32), &patch_ctx->mem); |
2874 | if (err) | 2972 | if (err) { |
2875 | return err; | 2973 | return err; |
2974 | } | ||
2876 | 2975 | ||
2877 | nvgpu_log_fn(g, "done"); | 2976 | nvgpu_log_fn(g, "done"); |
2878 | return 0; | 2977 | return 0; |
@@ -2886,9 +2985,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, | |||
2886 | 2985 | ||
2887 | nvgpu_log_fn(g, " "); | 2986 | nvgpu_log_fn(g, " "); |
2888 | 2987 | ||
2889 | if (patch_ctx->mem.gpu_va) | 2988 | if (patch_ctx->mem.gpu_va) { |
2890 | nvgpu_gmmu_unmap(vm, &patch_ctx->mem, | 2989 | nvgpu_gmmu_unmap(vm, &patch_ctx->mem, |
2891 | patch_ctx->mem.gpu_va); | 2990 | patch_ctx->mem.gpu_va); |
2991 | } | ||
2892 | 2992 | ||
2893 | nvgpu_dma_free(g, &patch_ctx->mem); | 2993 | nvgpu_dma_free(g, &patch_ctx->mem); |
2894 | patch_ctx->data_count = 0; | 2994 | patch_ctx->data_count = 0; |
@@ -2935,8 +3035,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
2935 | } | 3035 | } |
2936 | c->obj_class = class_num; | 3036 | c->obj_class = class_num; |
2937 | 3037 | ||
2938 | if (!gk20a_is_channel_marked_as_tsg(c)) | 3038 | if (!gk20a_is_channel_marked_as_tsg(c)) { |
2939 | return -EINVAL; | 3039 | return -EINVAL; |
3040 | } | ||
2940 | 3041 | ||
2941 | tsg = &f->tsg[c->tsgid]; | 3042 | tsg = &f->tsg[c->tsgid]; |
2942 | gr_ctx = &tsg->gr_ctx; | 3043 | gr_ctx = &tsg->gr_ctx; |
@@ -3007,8 +3108,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
3007 | } | 3108 | } |
3008 | #endif | 3109 | #endif |
3009 | 3110 | ||
3010 | if (g->ops.gr.set_czf_bypass) | 3111 | if (g->ops.gr.set_czf_bypass) { |
3011 | g->ops.gr.set_czf_bypass(g, c); | 3112 | g->ops.gr.set_czf_bypass(g, c); |
3113 | } | ||
3012 | 3114 | ||
3013 | /* PM ctxt switch is off by default */ | 3115 | /* PM ctxt switch is off by default */ |
3014 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | 3116 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); |
@@ -3112,8 +3214,9 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
3112 | nvgpu_vfree(g, gr->ctx_vars.local_golden_image); | 3214 | nvgpu_vfree(g, gr->ctx_vars.local_golden_image); |
3113 | gr->ctx_vars.local_golden_image = NULL; | 3215 | gr->ctx_vars.local_golden_image = NULL; |
3114 | 3216 | ||
3115 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) | 3217 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) { |
3116 | nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); | 3218 | nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); |
3219 | } | ||
3117 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; | 3220 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; |
3118 | 3221 | ||
3119 | gk20a_comptag_allocator_destroy(g, &gr->comp_tags); | 3222 | gk20a_comptag_allocator_destroy(g, &gr->comp_tags); |
@@ -3146,8 +3249,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3146 | if (gr->fbp_rop_l2_en_mask == NULL) { | 3249 | if (gr->fbp_rop_l2_en_mask == NULL) { |
3147 | gr->fbp_rop_l2_en_mask = | 3250 | gr->fbp_rop_l2_en_mask = |
3148 | nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32)); | 3251 | nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32)); |
3149 | if (!gr->fbp_rop_l2_en_mask) | 3252 | if (!gr->fbp_rop_l2_en_mask) { |
3150 | goto clean_up; | 3253 | goto clean_up; |
3254 | } | ||
3151 | } else { | 3255 | } else { |
3152 | memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count * | 3256 | memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count * |
3153 | sizeof(u32)); | 3257 | sizeof(u32)); |
@@ -3166,8 +3270,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3166 | 3270 | ||
3167 | gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); | 3271 | gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
3168 | if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, | 3272 | if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, |
3169 | "too many pes per gpc\n")) | 3273 | "too many pes per gpc\n")) { |
3170 | goto clean_up; | 3274 | goto clean_up; |
3275 | } | ||
3171 | 3276 | ||
3172 | gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); | 3277 | gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); |
3173 | 3278 | ||
@@ -3176,45 +3281,51 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3176 | goto clean_up; | 3281 | goto clean_up; |
3177 | } | 3282 | } |
3178 | 3283 | ||
3179 | if (gr->gpc_tpc_count == NULL) | 3284 | if (gr->gpc_tpc_count == NULL) { |
3180 | gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * | 3285 | gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * |
3181 | sizeof(u32)); | 3286 | sizeof(u32)); |
3182 | else | 3287 | } else { |
3183 | memset(gr->gpc_tpc_count, 0, gr->gpc_count * | 3288 | memset(gr->gpc_tpc_count, 0, gr->gpc_count * |
3184 | sizeof(u32)); | 3289 | sizeof(u32)); |
3290 | } | ||
3185 | 3291 | ||
3186 | if (gr->gpc_tpc_mask == NULL) | 3292 | if (gr->gpc_tpc_mask == NULL) { |
3187 | gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count * | 3293 | gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count * |
3188 | sizeof(u32)); | 3294 | sizeof(u32)); |
3189 | else | 3295 | } else { |
3190 | memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count * | 3296 | memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count * |
3191 | sizeof(u32)); | 3297 | sizeof(u32)); |
3298 | } | ||
3192 | 3299 | ||
3193 | if (gr->gpc_zcb_count == NULL) | 3300 | if (gr->gpc_zcb_count == NULL) { |
3194 | gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * | 3301 | gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * |
3195 | sizeof(u32)); | 3302 | sizeof(u32)); |
3196 | else | 3303 | } else { |
3197 | memset(gr->gpc_zcb_count, 0, gr->gpc_count * | 3304 | memset(gr->gpc_zcb_count, 0, gr->gpc_count * |
3198 | sizeof(u32)); | 3305 | sizeof(u32)); |
3306 | } | ||
3199 | 3307 | ||
3200 | if (gr->gpc_ppc_count == NULL) | 3308 | if (gr->gpc_ppc_count == NULL) { |
3201 | gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * | 3309 | gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * |
3202 | sizeof(u32)); | 3310 | sizeof(u32)); |
3203 | else | 3311 | } else { |
3204 | memset(gr->gpc_ppc_count, 0, gr->gpc_count * | 3312 | memset(gr->gpc_ppc_count, 0, gr->gpc_count * |
3205 | sizeof(u32)); | 3313 | sizeof(u32)); |
3314 | } | ||
3206 | 3315 | ||
3207 | if (gr->gpc_skip_mask == NULL) | 3316 | if (gr->gpc_skip_mask == NULL) { |
3208 | gr->gpc_skip_mask = | 3317 | gr->gpc_skip_mask = |
3209 | nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() * | 3318 | nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() * |
3210 | 4 * sizeof(u32)); | 3319 | 4 * sizeof(u32)); |
3211 | else | 3320 | } else { |
3212 | memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() * | 3321 | memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() * |
3213 | 4 * sizeof(u32)); | 3322 | 4 * sizeof(u32)); |
3323 | } | ||
3214 | 3324 | ||
3215 | if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count || | 3325 | if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count || |
3216 | !gr->gpc_ppc_count || !gr->gpc_skip_mask) | 3326 | !gr->gpc_ppc_count || !gr->gpc_skip_mask) { |
3217 | goto clean_up; | 3327 | goto clean_up; |
3328 | } | ||
3218 | 3329 | ||
3219 | for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) { | 3330 | for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) { |
3220 | if (g->ops.gr.get_gpc_tpc_mask) { | 3331 | if (g->ops.gr.get_gpc_tpc_mask) { |
@@ -3247,8 +3358,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3247 | nvgpu_kzalloc(g, gr->gpc_count * | 3358 | nvgpu_kzalloc(g, gr->gpc_count * |
3248 | sizeof(u32)); | 3359 | sizeof(u32)); |
3249 | if (!gr->pes_tpc_count[pes_index] || | 3360 | if (!gr->pes_tpc_count[pes_index] || |
3250 | !gr->pes_tpc_mask[pes_index]) | 3361 | !gr->pes_tpc_mask[pes_index]) { |
3251 | goto clean_up; | 3362 | goto clean_up; |
3363 | } | ||
3252 | } | 3364 | } |
3253 | 3365 | ||
3254 | tmp = gk20a_readl(g, | 3366 | tmp = gk20a_readl(g, |
@@ -3261,8 +3373,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3261 | /* detect PES presence by seeing if there are | 3373 | /* detect PES presence by seeing if there are |
3262 | * TPCs connected to it. | 3374 | * TPCs connected to it. |
3263 | */ | 3375 | */ |
3264 | if (pes_tpc_count != 0) | 3376 | if (pes_tpc_count != 0) { |
3265 | gr->gpc_ppc_count[gpc_index]++; | 3377 | gr->gpc_ppc_count[gpc_index]++; |
3378 | } | ||
3266 | 3379 | ||
3267 | gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; | 3380 | gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; |
3268 | gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; | 3381 | gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; |
@@ -3301,14 +3414,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3301 | } | 3414 | } |
3302 | 3415 | ||
3303 | /* allocate for max tpc per gpc */ | 3416 | /* allocate for max tpc per gpc */ |
3304 | if (gr->sm_to_cluster == NULL) | 3417 | if (gr->sm_to_cluster == NULL) { |
3305 | gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * | 3418 | gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * |
3306 | gr->max_tpc_per_gpc_count * | 3419 | gr->max_tpc_per_gpc_count * |
3307 | sm_per_tpc * sizeof(struct sm_info)); | 3420 | sm_per_tpc * sizeof(struct sm_info)); |
3308 | else | 3421 | } else { |
3309 | memset(gr->sm_to_cluster, 0, gr->gpc_count * | 3422 | memset(gr->sm_to_cluster, 0, gr->gpc_count * |
3310 | gr->max_tpc_per_gpc_count * | 3423 | gr->max_tpc_per_gpc_count * |
3311 | sm_per_tpc * sizeof(struct sm_info)); | 3424 | sm_per_tpc * sizeof(struct sm_info)); |
3425 | } | ||
3312 | gr->no_of_sm = 0; | 3426 | gr->no_of_sm = 0; |
3313 | 3427 | ||
3314 | nvgpu_log_info(g, "fbps: %d", gr->num_fbps); | 3428 | nvgpu_log_info(g, "fbps: %d", gr->num_fbps); |
@@ -3387,14 +3501,16 @@ static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) | |||
3387 | 3501 | ||
3388 | if (!nvgpu_mem_is_valid(&gr->mmu_wr_mem)) { | 3502 | if (!nvgpu_mem_is_valid(&gr->mmu_wr_mem)) { |
3389 | err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_wr_mem); | 3503 | err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_wr_mem); |
3390 | if (err) | 3504 | if (err) { |
3391 | goto err; | 3505 | goto err; |
3506 | } | ||
3392 | } | 3507 | } |
3393 | 3508 | ||
3394 | if (!nvgpu_mem_is_valid(&gr->mmu_rd_mem)) { | 3509 | if (!nvgpu_mem_is_valid(&gr->mmu_rd_mem)) { |
3395 | err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_rd_mem); | 3510 | err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_rd_mem); |
3396 | if (err) | 3511 | if (err) { |
3397 | goto err_free_wr_mem; | 3512 | goto err_free_wr_mem; |
3513 | } | ||
3398 | } | 3514 | } |
3399 | return 0; | 3515 | return 0; |
3400 | 3516 | ||
@@ -3446,11 +3562,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3446 | 3562 | ||
3447 | gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET; | 3563 | gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET; |
3448 | 3564 | ||
3449 | if (gr->tpc_count == 3) | 3565 | if (gr->tpc_count == 3) { |
3450 | gr->map_row_offset = 2; | 3566 | gr->map_row_offset = 2; |
3451 | else if (gr->tpc_count < 3) | 3567 | } else if (gr->tpc_count < 3) { |
3452 | gr->map_row_offset = 1; | 3568 | gr->map_row_offset = 1; |
3453 | else { | 3569 | } else { |
3454 | gr->map_row_offset = 3; | 3570 | gr->map_row_offset = 3; |
3455 | 3571 | ||
3456 | for (index = 1; index < 18; index++) { | 3572 | for (index = 1; index < 18; index++) { |
@@ -3487,13 +3603,15 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3487 | } | 3603 | } |
3488 | 3604 | ||
3489 | if (gr->map_tiles) { | 3605 | if (gr->map_tiles) { |
3490 | if (gr->map_tile_count != gr->tpc_count) | 3606 | if (gr->map_tile_count != gr->tpc_count) { |
3491 | delete_map = true; | 3607 | delete_map = true; |
3608 | } | ||
3492 | 3609 | ||
3493 | for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { | 3610 | for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { |
3494 | if (gr_gk20a_get_map_tile_count(gr, tile_count) | 3611 | if (gr_gk20a_get_map_tile_count(gr, tile_count) |
3495 | >= gr->tpc_count) | 3612 | >= gr->tpc_count) { |
3496 | delete_map = true; | 3613 | delete_map = true; |
3614 | } | ||
3497 | } | 3615 | } |
3498 | 3616 | ||
3499 | if (delete_map) { | 3617 | if (delete_map) { |
@@ -3540,10 +3658,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3540 | } | 3658 | } |
3541 | 3659 | ||
3542 | mul_factor = gr->gpc_count * max_tpc_count; | 3660 | mul_factor = gr->gpc_count * max_tpc_count; |
3543 | if (mul_factor & 0x1) | 3661 | if (mul_factor & 0x1) { |
3544 | mul_factor = 2; | 3662 | mul_factor = 2; |
3545 | else | 3663 | } else { |
3546 | mul_factor = 1; | 3664 | mul_factor = 1; |
3665 | } | ||
3547 | 3666 | ||
3548 | comm_denom = gr->gpc_count * max_tpc_count * mul_factor; | 3667 | comm_denom = gr->gpc_count * max_tpc_count * mul_factor; |
3549 | 3668 | ||
@@ -3552,10 +3671,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3552 | 3671 | ||
3553 | init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor; | 3672 | init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor; |
3554 | 3673 | ||
3555 | if (num_tpc != 0) | 3674 | if (num_tpc != 0) { |
3556 | init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2; | 3675 | init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2; |
3557 | else | 3676 | } else { |
3558 | init_err[gpc_index] = 0; | 3677 | init_err[gpc_index] = 0; |
3678 | } | ||
3559 | 3679 | ||
3560 | run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; | 3680 | run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; |
3561 | } | 3681 | } |
@@ -3565,8 +3685,9 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3565 | if ((run_err[gpc_index] * 2) >= comm_denom) { | 3685 | if ((run_err[gpc_index] * 2) >= comm_denom) { |
3566 | gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; | 3686 | gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; |
3567 | run_err[gpc_index] += init_frac[gpc_index] - comm_denom; | 3687 | run_err[gpc_index] += init_frac[gpc_index] - comm_denom; |
3568 | } else | 3688 | } else { |
3569 | run_err[gpc_index] += init_frac[gpc_index]; | 3689 | run_err[gpc_index] += init_frac[gpc_index]; |
3690 | } | ||
3570 | } | 3691 | } |
3571 | } | 3692 | } |
3572 | } | 3693 | } |
@@ -3578,10 +3699,11 @@ clean_up: | |||
3578 | nvgpu_kfree(g, sorted_num_tpcs); | 3699 | nvgpu_kfree(g, sorted_num_tpcs); |
3579 | nvgpu_kfree(g, sorted_to_unsorted_gpc_map); | 3700 | nvgpu_kfree(g, sorted_to_unsorted_gpc_map); |
3580 | 3701 | ||
3581 | if (ret) | 3702 | if (ret) { |
3582 | nvgpu_err(g, "fail"); | 3703 | nvgpu_err(g, "fail"); |
3583 | else | 3704 | } else { |
3584 | nvgpu_log_fn(g, "done"); | 3705 | nvgpu_log_fn(g, "done"); |
3706 | } | ||
3585 | 3707 | ||
3586 | return ret; | 3708 | return ret; |
3587 | } | 3709 | } |
@@ -3624,8 +3746,9 @@ int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | |||
3624 | struct zcull_ctx_desc *zcull_ctx; | 3746 | struct zcull_ctx_desc *zcull_ctx; |
3625 | 3747 | ||
3626 | tsg = tsg_gk20a_from_ch(c); | 3748 | tsg = tsg_gk20a_from_ch(c); |
3627 | if (!tsg) | 3749 | if (!tsg) { |
3628 | return -EINVAL; | 3750 | return -EINVAL; |
3751 | } | ||
3629 | 3752 | ||
3630 | zcull_ctx = &tsg->gr_ctx.zcull_ctx; | 3753 | zcull_ctx = &tsg->gr_ctx.zcull_ctx; |
3631 | zcull_ctx->ctx_sw_mode = mode; | 3754 | zcull_ctx->ctx_sw_mode = mode; |
@@ -3815,8 +3938,9 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | |||
3815 | ret = g->ops.gr.add_zbc_color(g, gr, | 3938 | ret = g->ops.gr.add_zbc_color(g, gr, |
3816 | zbc_val, gr->max_used_color_index); | 3939 | zbc_val, gr->max_used_color_index); |
3817 | 3940 | ||
3818 | if (!ret) | 3941 | if (!ret) { |
3819 | gr->max_used_color_index++; | 3942 | gr->max_used_color_index++; |
3943 | } | ||
3820 | } | 3944 | } |
3821 | break; | 3945 | break; |
3822 | case GK20A_ZBC_TYPE_DEPTH: | 3946 | case GK20A_ZBC_TYPE_DEPTH: |
@@ -3845,8 +3969,9 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | |||
3845 | ret = g->ops.gr.add_zbc_depth(g, gr, | 3969 | ret = g->ops.gr.add_zbc_depth(g, gr, |
3846 | zbc_val, gr->max_used_depth_index); | 3970 | zbc_val, gr->max_used_depth_index); |
3847 | 3971 | ||
3848 | if (!ret) | 3972 | if (!ret) { |
3849 | gr->max_used_depth_index++; | 3973 | gr->max_used_depth_index++; |
3974 | } | ||
3850 | } | 3975 | } |
3851 | break; | 3976 | break; |
3852 | case T19X_ZBC: | 3977 | case T19X_ZBC: |
@@ -3956,8 +4081,9 @@ static int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr) | |||
3956 | 4081 | ||
3957 | ret = g->ops.gr.add_zbc_color(g, gr, &zbc_val, i); | 4082 | ret = g->ops.gr.add_zbc_color(g, gr, &zbc_val, i); |
3958 | 4083 | ||
3959 | if (ret) | 4084 | if (ret) { |
3960 | return ret; | 4085 | return ret; |
4086 | } | ||
3961 | } | 4087 | } |
3962 | for (i = 0; i < gr->max_used_depth_index; i++) { | 4088 | for (i = 0; i < gr->max_used_depth_index; i++) { |
3963 | struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i]; | 4089 | struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i]; |
@@ -3968,14 +4094,16 @@ static int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr) | |||
3968 | zbc_val.format = d_tbl->format; | 4094 | zbc_val.format = d_tbl->format; |
3969 | 4095 | ||
3970 | ret = g->ops.gr.add_zbc_depth(g, gr, &zbc_val, i); | 4096 | ret = g->ops.gr.add_zbc_depth(g, gr, &zbc_val, i); |
3971 | if (ret) | 4097 | if (ret) { |
3972 | return ret; | 4098 | return ret; |
4099 | } | ||
3973 | } | 4100 | } |
3974 | 4101 | ||
3975 | if (g->ops.gr.load_zbc_s_tbl) { | 4102 | if (g->ops.gr.load_zbc_s_tbl) { |
3976 | ret = g->ops.gr.load_zbc_s_tbl(g, gr); | 4103 | ret = g->ops.gr.load_zbc_s_tbl(g, gr); |
3977 | if (ret) | 4104 | if (ret) { |
3978 | return ret; | 4105 | return ret; |
4106 | } | ||
3979 | } | 4107 | } |
3980 | 4108 | ||
3981 | return 0; | 4109 | return 0; |
@@ -4131,13 +4259,14 @@ void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config) | |||
4131 | (engine_info->engine_enum == ENGINE_GR_GK20A)) { | 4259 | (engine_info->engine_enum == ENGINE_GR_GK20A)) { |
4132 | g->ops.therm.init_blcg_mode(g, mode_config, active_engine_id); | 4260 | g->ops.therm.init_blcg_mode(g, mode_config, active_engine_id); |
4133 | break; | 4261 | break; |
4134 | } else if (cgmode == ELCG_MODE) | 4262 | } else if (cgmode == ELCG_MODE) { |
4135 | g->ops.therm.init_elcg_mode(g, mode_config, | 4263 | g->ops.therm.init_elcg_mode(g, mode_config, |
4136 | active_engine_id); | 4264 | active_engine_id); |
4137 | else | 4265 | } else { |
4138 | nvgpu_err(g, "invalid cg mode %d, config %d for " | 4266 | nvgpu_err(g, "invalid cg mode %d, config %d for " |
4139 | "act_eng_id %d", | 4267 | "act_eng_id %d", |
4140 | cgmode, mode_config, active_engine_id); | 4268 | cgmode, mode_config, active_engine_id); |
4269 | } | ||
4141 | } | 4270 | } |
4142 | } | 4271 | } |
4143 | 4272 | ||
@@ -4257,8 +4386,9 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4257 | u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; | 4386 | u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; |
4258 | u32 map_tile_count; | 4387 | u32 map_tile_count; |
4259 | 4388 | ||
4260 | if (!gr->map_tiles) | 4389 | if (!gr->map_tiles) { |
4261 | return -1; | 4390 | return -1; |
4391 | } | ||
4262 | 4392 | ||
4263 | if (zcull_alloc_num % 8 != 0) { | 4393 | if (zcull_alloc_num % 8 != 0) { |
4264 | /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ | 4394 | /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ |
@@ -4288,9 +4418,10 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4288 | zcull_bank_counters[map_tile_count]++; | 4418 | zcull_bank_counters[map_tile_count]++; |
4289 | } | 4419 | } |
4290 | 4420 | ||
4291 | if (g->ops.gr.program_zcull_mapping) | 4421 | if (g->ops.gr.program_zcull_mapping) { |
4292 | g->ops.gr.program_zcull_mapping(g, zcull_alloc_num, | 4422 | g->ops.gr.program_zcull_mapping(g, zcull_alloc_num, |
4293 | zcull_map_tiles); | 4423 | zcull_map_tiles); |
4424 | } | ||
4294 | 4425 | ||
4295 | nvgpu_kfree(g, zcull_map_tiles); | 4426 | nvgpu_kfree(g, zcull_map_tiles); |
4296 | nvgpu_kfree(g, zcull_bank_counters); | 4427 | nvgpu_kfree(g, zcull_bank_counters); |
@@ -4307,8 +4438,9 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4307 | return -EINVAL; | 4438 | return -EINVAL; |
4308 | } | 4439 | } |
4309 | if (gpc_zcull_count != gr->max_zcull_per_gpc_count && | 4440 | if (gpc_zcull_count != gr->max_zcull_per_gpc_count && |
4310 | gpc_zcull_count != 0) | 4441 | gpc_zcull_count != 0) { |
4311 | floorsweep = true; | 4442 | floorsweep = true; |
4443 | } | ||
4312 | } | 4444 | } |
4313 | 4445 | ||
4314 | /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */ | 4446 | /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */ |
@@ -4426,8 +4558,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4426 | fb_mmu_debug_rd_vol_false_f() | | 4558 | fb_mmu_debug_rd_vol_false_f() | |
4427 | fb_mmu_debug_rd_addr_f(addr)); | 4559 | fb_mmu_debug_rd_addr_f(addr)); |
4428 | 4560 | ||
4429 | if (g->ops.gr.init_gpc_mmu) | 4561 | if (g->ops.gr.init_gpc_mmu) { |
4430 | g->ops.gr.init_gpc_mmu(g); | 4562 | g->ops.gr.init_gpc_mmu(g); |
4563 | } | ||
4431 | 4564 | ||
4432 | /* load gr floorsweeping registers */ | 4565 | /* load gr floorsweeping registers */ |
4433 | data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); | 4566 | data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); |
@@ -4437,8 +4570,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4437 | 4570 | ||
4438 | gr_gk20a_zcull_init_hw(g, gr); | 4571 | gr_gk20a_zcull_init_hw(g, gr); |
4439 | 4572 | ||
4440 | if (g->ops.priv_ring.set_ppriv_timeout_settings) | 4573 | if (g->ops.priv_ring.set_ppriv_timeout_settings) { |
4441 | g->ops.priv_ring.set_ppriv_timeout_settings(g); | 4574 | g->ops.priv_ring.set_ppriv_timeout_settings(g); |
4575 | } | ||
4442 | 4576 | ||
4443 | /* enable fifo access */ | 4577 | /* enable fifo access */ |
4444 | gk20a_writel(g, gr_gpfifo_ctl_r(), | 4578 | gk20a_writel(g, gr_gpfifo_ctl_r(), |
@@ -4458,12 +4592,14 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4458 | g->ops.gr.set_hww_esr_report_mask(g); | 4592 | g->ops.gr.set_hww_esr_report_mask(g); |
4459 | 4593 | ||
4460 | /* enable TPC exceptions per GPC */ | 4594 | /* enable TPC exceptions per GPC */ |
4461 | if (g->ops.gr.enable_gpc_exceptions) | 4595 | if (g->ops.gr.enable_gpc_exceptions) { |
4462 | g->ops.gr.enable_gpc_exceptions(g); | 4596 | g->ops.gr.enable_gpc_exceptions(g); |
4597 | } | ||
4463 | 4598 | ||
4464 | /* enable ECC for L1/SM */ | 4599 | /* enable ECC for L1/SM */ |
4465 | if (g->ops.gr.ecc_init_scrub_reg) | 4600 | if (g->ops.gr.ecc_init_scrub_reg) { |
4466 | g->ops.gr.ecc_init_scrub_reg(g); | 4601 | g->ops.gr.ecc_init_scrub_reg(g); |
4602 | } | ||
4467 | 4603 | ||
4468 | /* TBD: enable per BE exceptions */ | 4604 | /* TBD: enable per BE exceptions */ |
4469 | 4605 | ||
@@ -4472,14 +4608,17 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4472 | 4608 | ||
4473 | gr_gk20a_load_zbc_table(g, gr); | 4609 | gr_gk20a_load_zbc_table(g, gr); |
4474 | 4610 | ||
4475 | if (g->ops.ltc.init_cbc) | 4611 | if (g->ops.ltc.init_cbc) { |
4476 | g->ops.ltc.init_cbc(g, gr); | 4612 | g->ops.ltc.init_cbc(g, gr); |
4613 | } | ||
4477 | 4614 | ||
4478 | if (g->ops.fb.init_cbc) | 4615 | if (g->ops.fb.init_cbc) { |
4479 | g->ops.fb.init_cbc(g, gr); | 4616 | g->ops.fb.init_cbc(g, gr); |
4617 | } | ||
4480 | 4618 | ||
4481 | if (g->ops.gr.disable_rd_coalesce) | 4619 | if (g->ops.gr.disable_rd_coalesce) { |
4482 | g->ops.gr.disable_rd_coalesce(g); | 4620 | g->ops.gr.disable_rd_coalesce(g); |
4621 | } | ||
4483 | 4622 | ||
4484 | /* load ctx init */ | 4623 | /* load ctx init */ |
4485 | for (i = 0; i < sw_ctx_load->count; i++) { | 4624 | for (i = 0; i < sw_ctx_load->count; i++) { |
@@ -4489,13 +4628,15 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4489 | 4628 | ||
4490 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 4629 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
4491 | GR_IDLE_CHECK_DEFAULT); | 4630 | GR_IDLE_CHECK_DEFAULT); |
4492 | if (err) | 4631 | if (err) { |
4493 | goto out; | 4632 | goto out; |
4633 | } | ||
4494 | 4634 | ||
4495 | if (g->ops.gr.init_preemption_state) { | 4635 | if (g->ops.gr.init_preemption_state) { |
4496 | err = g->ops.gr.init_preemption_state(g); | 4636 | err = g->ops.gr.init_preemption_state(g); |
4497 | if (err) | 4637 | if (err) { |
4498 | goto out; | 4638 | goto out; |
4639 | } | ||
4499 | } | 4640 | } |
4500 | 4641 | ||
4501 | /* disable fe_go_idle */ | 4642 | /* disable fe_go_idle */ |
@@ -4507,13 +4648,15 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4507 | 4648 | ||
4508 | /* floorsweep anything left */ | 4649 | /* floorsweep anything left */ |
4509 | err = g->ops.gr.init_fs_state(g); | 4650 | err = g->ops.gr.init_fs_state(g); |
4510 | if (err) | 4651 | if (err) { |
4511 | goto out; | 4652 | goto out; |
4653 | } | ||
4512 | 4654 | ||
4513 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 4655 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
4514 | GR_IDLE_CHECK_DEFAULT); | 4656 | GR_IDLE_CHECK_DEFAULT); |
4515 | if (err) | 4657 | if (err) { |
4516 | goto restore_fe_go_idle; | 4658 | goto restore_fe_go_idle; |
4659 | } | ||
4517 | 4660 | ||
4518 | restore_fe_go_idle: | 4661 | restore_fe_go_idle: |
4519 | /* restore fe_go_idle */ | 4662 | /* restore fe_go_idle */ |
@@ -4521,8 +4664,9 @@ restore_fe_go_idle: | |||
4521 | gr_fe_go_idle_timeout_count_prod_f()); | 4664 | gr_fe_go_idle_timeout_count_prod_f()); |
4522 | 4665 | ||
4523 | if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 4666 | if (err || gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
4524 | GR_IDLE_CHECK_DEFAULT)) | 4667 | GR_IDLE_CHECK_DEFAULT)) { |
4525 | goto out; | 4668 | goto out; |
4669 | } | ||
4526 | 4670 | ||
4527 | /* load method init */ | 4671 | /* load method init */ |
4528 | if (sw_method_init->count) { | 4672 | if (sw_method_init->count) { |
@@ -4556,40 +4700,51 @@ static void gr_gk20a_load_gating_prod(struct gk20a *g) | |||
4556 | nvgpu_log_fn(g, " "); | 4700 | nvgpu_log_fn(g, " "); |
4557 | 4701 | ||
4558 | /* slcg prod values */ | 4702 | /* slcg prod values */ |
4559 | if (g->ops.clock_gating.slcg_bus_load_gating_prod) | 4703 | if (g->ops.clock_gating.slcg_bus_load_gating_prod) { |
4560 | g->ops.clock_gating.slcg_bus_load_gating_prod(g, | 4704 | g->ops.clock_gating.slcg_bus_load_gating_prod(g, |
4561 | g->slcg_enabled); | 4705 | g->slcg_enabled); |
4562 | if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) | 4706 | } |
4707 | if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) { | ||
4563 | g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, | 4708 | g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, |
4564 | g->slcg_enabled); | 4709 | g->slcg_enabled); |
4565 | if (g->ops.clock_gating.slcg_gr_load_gating_prod) | 4710 | } |
4711 | if (g->ops.clock_gating.slcg_gr_load_gating_prod) { | ||
4566 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | 4712 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, |
4567 | g->slcg_enabled); | 4713 | g->slcg_enabled); |
4568 | if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) | 4714 | } |
4715 | if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) { | ||
4569 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, | 4716 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, |
4570 | g->slcg_enabled); | 4717 | g->slcg_enabled); |
4571 | if (g->ops.clock_gating.slcg_perf_load_gating_prod) | 4718 | } |
4719 | if (g->ops.clock_gating.slcg_perf_load_gating_prod) { | ||
4572 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, | 4720 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, |
4573 | g->slcg_enabled); | 4721 | g->slcg_enabled); |
4574 | if (g->ops.clock_gating.slcg_xbar_load_gating_prod) | 4722 | } |
4723 | if (g->ops.clock_gating.slcg_xbar_load_gating_prod) { | ||
4575 | g->ops.clock_gating.slcg_xbar_load_gating_prod(g, | 4724 | g->ops.clock_gating.slcg_xbar_load_gating_prod(g, |
4576 | g->slcg_enabled); | 4725 | g->slcg_enabled); |
4726 | } | ||
4577 | 4727 | ||
4578 | /* blcg prod values */ | 4728 | /* blcg prod values */ |
4579 | if (g->ops.clock_gating.blcg_bus_load_gating_prod) | 4729 | if (g->ops.clock_gating.blcg_bus_load_gating_prod) { |
4580 | g->ops.clock_gating.blcg_bus_load_gating_prod(g, | 4730 | g->ops.clock_gating.blcg_bus_load_gating_prod(g, |
4581 | g->blcg_enabled); | 4731 | g->blcg_enabled); |
4582 | if (g->ops.clock_gating.blcg_gr_load_gating_prod) | 4732 | } |
4733 | if (g->ops.clock_gating.blcg_gr_load_gating_prod) { | ||
4583 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, | 4734 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, |
4584 | g->blcg_enabled); | 4735 | g->blcg_enabled); |
4585 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) | 4736 | } |
4737 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) { | ||
4586 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, | 4738 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, |
4587 | g->blcg_enabled); | 4739 | g->blcg_enabled); |
4588 | if (g->ops.clock_gating.blcg_xbar_load_gating_prod) | 4740 | } |
4741 | if (g->ops.clock_gating.blcg_xbar_load_gating_prod) { | ||
4589 | g->ops.clock_gating.blcg_xbar_load_gating_prod(g, | 4742 | g->ops.clock_gating.blcg_xbar_load_gating_prod(g, |
4590 | g->blcg_enabled); | 4743 | g->blcg_enabled); |
4591 | if (g->ops.clock_gating.pg_gr_load_gating_prod) | 4744 | } |
4745 | if (g->ops.clock_gating.pg_gr_load_gating_prod) { | ||
4592 | g->ops.clock_gating.pg_gr_load_gating_prod(g, true); | 4746 | g->ops.clock_gating.pg_gr_load_gating_prod(g, true); |
4747 | } | ||
4593 | 4748 | ||
4594 | nvgpu_log_fn(g, "done"); | 4749 | nvgpu_log_fn(g, "done"); |
4595 | } | 4750 | } |
@@ -4624,9 +4779,10 @@ static int gk20a_init_gr_prepare(struct gk20a *g) | |||
4624 | 4779 | ||
4625 | if (!g->gr.ctx_vars.valid) { | 4780 | if (!g->gr.ctx_vars.valid) { |
4626 | err = gr_gk20a_init_ctx_vars(g, &g->gr); | 4781 | err = gr_gk20a_init_ctx_vars(g, &g->gr); |
4627 | if (err) | 4782 | if (err) { |
4628 | nvgpu_err(g, | 4783 | nvgpu_err(g, |
4629 | "fail to load gr init ctx"); | 4784 | "fail to load gr init ctx"); |
4785 | } | ||
4630 | } | 4786 | } |
4631 | return err; | 4787 | return err; |
4632 | } | 4788 | } |
@@ -4669,18 +4825,21 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g) | |||
4669 | u32 err = 0; | 4825 | u32 err = 0; |
4670 | 4826 | ||
4671 | err = g->ops.gr.load_ctxsw_ucode(g); | 4827 | err = g->ops.gr.load_ctxsw_ucode(g); |
4672 | if (err) | 4828 | if (err) { |
4673 | goto out; | 4829 | goto out; |
4830 | } | ||
4674 | 4831 | ||
4675 | err = gr_gk20a_wait_ctxsw_ready(g); | 4832 | err = gr_gk20a_wait_ctxsw_ready(g); |
4676 | if (err) | 4833 | if (err) { |
4677 | goto out; | 4834 | goto out; |
4835 | } | ||
4678 | 4836 | ||
4679 | out: | 4837 | out: |
4680 | if (err) | 4838 | if (err) { |
4681 | nvgpu_err(g, "fail"); | 4839 | nvgpu_err(g, "fail"); |
4682 | else | 4840 | } else { |
4683 | nvgpu_log_fn(g, "done"); | 4841 | nvgpu_log_fn(g, "done"); |
4842 | } | ||
4684 | 4843 | ||
4685 | return err; | 4844 | return err; |
4686 | } | 4845 | } |
@@ -4703,19 +4862,22 @@ static int gk20a_init_gr_reset_enable_hw(struct gk20a *g) | |||
4703 | } | 4862 | } |
4704 | 4863 | ||
4705 | err = gr_gk20a_wait_mem_scrubbing(g); | 4864 | err = gr_gk20a_wait_mem_scrubbing(g); |
4706 | if (err) | 4865 | if (err) { |
4707 | goto out; | 4866 | goto out; |
4867 | } | ||
4708 | 4868 | ||
4709 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), | 4869 | err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g), |
4710 | GR_IDLE_CHECK_DEFAULT); | 4870 | GR_IDLE_CHECK_DEFAULT); |
4711 | if (err) | 4871 | if (err) { |
4712 | goto out; | 4872 | goto out; |
4873 | } | ||
4713 | 4874 | ||
4714 | out: | 4875 | out: |
4715 | if (err) | 4876 | if (err) { |
4716 | nvgpu_err(g, "fail"); | 4877 | nvgpu_err(g, "fail"); |
4717 | else | 4878 | } else { |
4718 | nvgpu_log_fn(g, "done"); | 4879 | nvgpu_log_fn(g, "done"); |
4880 | } | ||
4719 | 4881 | ||
4720 | return 0; | 4882 | return 0; |
4721 | } | 4883 | } |
@@ -4774,42 +4936,51 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) | |||
4774 | #endif | 4936 | #endif |
4775 | 4937 | ||
4776 | err = gr_gk20a_init_gr_config(g, gr); | 4938 | err = gr_gk20a_init_gr_config(g, gr); |
4777 | if (err) | 4939 | if (err) { |
4778 | goto clean_up; | 4940 | goto clean_up; |
4941 | } | ||
4779 | 4942 | ||
4780 | err = gr_gk20a_init_mmu_sw(g, gr); | 4943 | err = gr_gk20a_init_mmu_sw(g, gr); |
4781 | if (err) | 4944 | if (err) { |
4782 | goto clean_up; | 4945 | goto clean_up; |
4946 | } | ||
4783 | 4947 | ||
4784 | err = gr_gk20a_init_map_tiles(g, gr); | 4948 | err = gr_gk20a_init_map_tiles(g, gr); |
4785 | if (err) | 4949 | if (err) { |
4786 | goto clean_up; | 4950 | goto clean_up; |
4951 | } | ||
4787 | 4952 | ||
4788 | if (g->ops.ltc.init_comptags) { | 4953 | if (g->ops.ltc.init_comptags) { |
4789 | err = g->ops.ltc.init_comptags(g, gr); | 4954 | err = g->ops.ltc.init_comptags(g, gr); |
4790 | if (err) | 4955 | if (err) { |
4791 | goto clean_up; | 4956 | goto clean_up; |
4957 | } | ||
4792 | } | 4958 | } |
4793 | 4959 | ||
4794 | err = gr_gk20a_init_zcull(g, gr); | 4960 | err = gr_gk20a_init_zcull(g, gr); |
4795 | if (err) | 4961 | if (err) { |
4796 | goto clean_up; | 4962 | goto clean_up; |
4963 | } | ||
4797 | 4964 | ||
4798 | err = g->ops.gr.alloc_global_ctx_buffers(g); | 4965 | err = g->ops.gr.alloc_global_ctx_buffers(g); |
4799 | if (err) | 4966 | if (err) { |
4800 | goto clean_up; | 4967 | goto clean_up; |
4968 | } | ||
4801 | 4969 | ||
4802 | err = gr_gk20a_init_access_map(g); | 4970 | err = gr_gk20a_init_access_map(g); |
4803 | if (err) | 4971 | if (err) { |
4804 | goto clean_up; | 4972 | goto clean_up; |
4973 | } | ||
4805 | 4974 | ||
4806 | gr_gk20a_load_zbc_default_table(g, gr); | 4975 | gr_gk20a_load_zbc_default_table(g, gr); |
4807 | 4976 | ||
4808 | if (g->ops.gr.init_czf_bypass) | 4977 | if (g->ops.gr.init_czf_bypass) { |
4809 | g->ops.gr.init_czf_bypass(g); | 4978 | g->ops.gr.init_czf_bypass(g); |
4979 | } | ||
4810 | 4980 | ||
4811 | if (g->ops.gr.init_gfxp_wfi_timeout_count) | 4981 | if (g->ops.gr.init_gfxp_wfi_timeout_count) { |
4812 | g->ops.gr.init_gfxp_wfi_timeout_count(g); | 4982 | g->ops.gr.init_gfxp_wfi_timeout_count(g); |
4983 | } | ||
4813 | 4984 | ||
4814 | err = nvgpu_mutex_init(&gr->ctx_mutex); | 4985 | err = nvgpu_mutex_init(&gr->ctx_mutex); |
4815 | if (err != 0) { | 4986 | if (err != 0) { |
@@ -4823,8 +4994,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) | |||
4823 | gr->sw_ready = true; | 4994 | gr->sw_ready = true; |
4824 | 4995 | ||
4825 | err = nvgpu_ecc_init_support(g); | 4996 | err = nvgpu_ecc_init_support(g); |
4826 | if (err) | 4997 | if (err) { |
4827 | goto clean_up; | 4998 | goto clean_up; |
4999 | } | ||
4828 | 5000 | ||
4829 | nvgpu_log_fn(g, "done"); | 5001 | nvgpu_log_fn(g, "done"); |
4830 | return 0; | 5002 | return 0; |
@@ -4895,27 +5067,32 @@ int gk20a_init_gr_support(struct gk20a *g) | |||
4895 | } | 5067 | } |
4896 | 5068 | ||
4897 | err = gr_gk20a_init_ctxsw(g); | 5069 | err = gr_gk20a_init_ctxsw(g); |
4898 | if (err) | 5070 | if (err) { |
4899 | return err; | 5071 | return err; |
5072 | } | ||
4900 | 5073 | ||
4901 | /* this appears query for sw states but fecs actually init | 5074 | /* this appears query for sw states but fecs actually init |
4902 | ramchain, etc so this is hw init */ | 5075 | ramchain, etc so this is hw init */ |
4903 | err = g->ops.gr.init_ctx_state(g); | 5076 | err = g->ops.gr.init_ctx_state(g); |
4904 | if (err) | 5077 | if (err) { |
4905 | return err; | 5078 | return err; |
5079 | } | ||
4906 | 5080 | ||
4907 | err = gk20a_init_gr_setup_sw(g); | 5081 | err = gk20a_init_gr_setup_sw(g); |
4908 | if (err) | 5082 | if (err) { |
4909 | return err; | 5083 | return err; |
5084 | } | ||
4910 | 5085 | ||
4911 | err = gk20a_init_gr_setup_hw(g); | 5086 | err = gk20a_init_gr_setup_hw(g); |
4912 | if (err) | 5087 | if (err) { |
4913 | return err; | 5088 | return err; |
5089 | } | ||
4914 | 5090 | ||
4915 | if (g->can_elpg) { | 5091 | if (g->can_elpg) { |
4916 | err = gk20a_init_gr_bind_fecs_elpg(g); | 5092 | err = gk20a_init_gr_bind_fecs_elpg(g); |
4917 | if (err) | 5093 | if (err) { |
4918 | return err; | 5094 | return err; |
5095 | } | ||
4919 | } | 5096 | } |
4920 | 5097 | ||
4921 | gr_gk20a_enable_elcg(g); | 5098 | gr_gk20a_enable_elcg(g); |
@@ -4991,12 +5168,14 @@ int gk20a_enable_gr_hw(struct gk20a *g) | |||
4991 | nvgpu_log_fn(g, " "); | 5168 | nvgpu_log_fn(g, " "); |
4992 | 5169 | ||
4993 | err = gk20a_init_gr_prepare(g); | 5170 | err = gk20a_init_gr_prepare(g); |
4994 | if (err) | 5171 | if (err) { |
4995 | return err; | 5172 | return err; |
5173 | } | ||
4996 | 5174 | ||
4997 | err = gk20a_init_gr_reset_enable_hw(g); | 5175 | err = gk20a_init_gr_reset_enable_hw(g); |
4998 | if (err) | 5176 | if (err) { |
4999 | return err; | 5177 | return err; |
5178 | } | ||
5000 | 5179 | ||
5001 | nvgpu_log_fn(g, "done"); | 5180 | nvgpu_log_fn(g, "done"); |
5002 | 5181 | ||
@@ -5042,8 +5221,9 @@ int gk20a_gr_reset(struct gk20a *g) | |||
5042 | /* this appears query for sw states but fecs actually init | 5221 | /* this appears query for sw states but fecs actually init |
5043 | ramchain, etc so this is hw init */ | 5222 | ramchain, etc so this is hw init */ |
5044 | err = g->ops.gr.init_ctx_state(g); | 5223 | err = g->ops.gr.init_ctx_state(g); |
5045 | if (err) | 5224 | if (err) { |
5046 | return err; | 5225 | return err; |
5226 | } | ||
5047 | 5227 | ||
5048 | size = 0; | 5228 | size = 0; |
5049 | err = gr_gk20a_fecs_get_reglist_img_size(g, &size); | 5229 | err = gr_gk20a_fecs_get_reglist_img_size(g, &size); |
@@ -5159,8 +5339,9 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, | |||
5159 | u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); | 5339 | u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); |
5160 | int ret = 0; | 5340 | int ret = 0; |
5161 | 5341 | ||
5162 | if (!gr_fecs_intr) | 5342 | if (!gr_fecs_intr) { |
5163 | return 0; | 5343 | return 0; |
5344 | } | ||
5164 | 5345 | ||
5165 | if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { | 5346 | if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { |
5166 | gk20a_gr_set_error_notifier(g, isr_data, | 5347 | gk20a_gr_set_error_notifier(g, isr_data, |
@@ -5206,9 +5387,10 @@ static int gk20a_gr_handle_class_error(struct gk20a *g, | |||
5206 | 5387 | ||
5207 | nvgpu_err(g, "trapped data low 0x%08x", | 5388 | nvgpu_err(g, "trapped data low 0x%08x", |
5208 | gk20a_readl(g, gr_trapped_data_lo_r())); | 5389 | gk20a_readl(g, gr_trapped_data_lo_r())); |
5209 | if (gr_trapped_addr_datahigh_v(isr_data->addr)) | 5390 | if (gr_trapped_addr_datahigh_v(isr_data->addr)) { |
5210 | nvgpu_err(g, "trapped data high 0x%08x", | 5391 | nvgpu_err(g, "trapped data high 0x%08x", |
5211 | gk20a_readl(g, gr_trapped_data_hi_r())); | 5392 | gk20a_readl(g, gr_trapped_data_hi_r())); |
5393 | } | ||
5212 | 5394 | ||
5213 | return -EINVAL; | 5395 | return -EINVAL; |
5214 | } | 5396 | } |
@@ -5435,8 +5617,9 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( | |||
5435 | /* slow path */ | 5617 | /* slow path */ |
5436 | for (chid = 0; chid < f->num_channels; chid++) { | 5618 | for (chid = 0; chid < f->num_channels; chid++) { |
5437 | struct channel_gk20a *ch = &f->channel[chid]; | 5619 | struct channel_gk20a *ch = &f->channel[chid]; |
5438 | if (!gk20a_channel_get(ch)) | 5620 | if (!gk20a_channel_get(ch)) { |
5439 | continue; | 5621 | continue; |
5622 | } | ||
5440 | 5623 | ||
5441 | if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> | 5624 | if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> |
5442 | ram_in_base_shift_v()) == | 5625 | ram_in_base_shift_v()) == |
@@ -5449,8 +5632,9 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( | |||
5449 | gk20a_channel_put(ch); | 5632 | gk20a_channel_put(ch); |
5450 | } | 5633 | } |
5451 | 5634 | ||
5452 | if (!ret) | 5635 | if (!ret) { |
5453 | goto unlock; | 5636 | goto unlock; |
5637 | } | ||
5454 | 5638 | ||
5455 | /* add to free tlb entry */ | 5639 | /* add to free tlb entry */ |
5456 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { | 5640 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { |
@@ -5473,8 +5657,9 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( | |||
5473 | 5657 | ||
5474 | unlock: | 5658 | unlock: |
5475 | nvgpu_spinlock_release(&gr->ch_tlb_lock); | 5659 | nvgpu_spinlock_release(&gr->ch_tlb_lock); |
5476 | if (curr_tsgid) | 5660 | if (curr_tsgid) { |
5477 | *curr_tsgid = tsgid; | 5661 | *curr_tsgid = tsgid; |
5662 | } | ||
5478 | return ret; | 5663 | return ret; |
5479 | } | 5664 | } |
5480 | 5665 | ||
@@ -5507,8 +5692,9 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g) | |||
5507 | * assumption: all SMs will have debug mode enabled/disabled | 5692 | * assumption: all SMs will have debug mode enabled/disabled |
5508 | * uniformly. */ | 5693 | * uniformly. */ |
5509 | if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) == | 5694 | if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) == |
5510 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) | 5695 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) { |
5511 | return true; | 5696 | return true; |
5697 | } | ||
5512 | 5698 | ||
5513 | return false; | 5699 | return false; |
5514 | } | 5700 | } |
@@ -5571,8 +5757,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | |||
5571 | * Do not disable exceptions if the only SM exception is BPT_INT | 5757 | * Do not disable exceptions if the only SM exception is BPT_INT |
5572 | */ | 5758 | */ |
5573 | if ((global_esr == gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) | 5759 | if ((global_esr == gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) |
5574 | && (warp_esr == 0)) | 5760 | && (warp_esr == 0)) { |
5575 | disable_sm_exceptions = false; | 5761 | disable_sm_exceptions = false; |
5762 | } | ||
5576 | 5763 | ||
5577 | if (!ignore_debugger && disable_sm_exceptions) { | 5764 | if (!ignore_debugger && disable_sm_exceptions) { |
5578 | u32 tpc_exception_en = gk20a_readl(g, | 5765 | u32 tpc_exception_en = gk20a_readl(g, |
@@ -5661,16 +5848,18 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5661 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5848 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5662 | "GPC%d TPC%d: SM exception pending", gpc, tpc); | 5849 | "GPC%d TPC%d: SM exception pending", gpc, tpc); |
5663 | 5850 | ||
5664 | if (g->ops.gr.handle_tpc_sm_ecc_exception) | 5851 | if (g->ops.gr.handle_tpc_sm_ecc_exception) { |
5665 | g->ops.gr.handle_tpc_sm_ecc_exception(g, gpc, tpc, | 5852 | g->ops.gr.handle_tpc_sm_ecc_exception(g, gpc, tpc, |
5666 | post_event, fault_ch, hww_global_esr); | 5853 | post_event, fault_ch, hww_global_esr); |
5854 | } | ||
5667 | 5855 | ||
5668 | g->ops.gr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel); | 5856 | g->ops.gr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel); |
5669 | 5857 | ||
5670 | for (sm = 0; sm < sm_per_tpc; sm++) { | 5858 | for (sm = 0; sm < sm_per_tpc; sm++) { |
5671 | 5859 | ||
5672 | if (!(esr_sm_sel & (1 << sm))) | 5860 | if (!(esr_sm_sel & (1 << sm))) { |
5673 | continue; | 5861 | continue; |
5862 | } | ||
5674 | 5863 | ||
5675 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5864 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5676 | "GPC%d TPC%d: SM%d exception pending", | 5865 | "GPC%d TPC%d: SM%d exception pending", |
@@ -5698,9 +5887,10 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5698 | ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event); | 5887 | ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event); |
5699 | } | 5888 | } |
5700 | 5889 | ||
5701 | if (g->ops.gr.handle_tpc_mpc_exception) | 5890 | if (g->ops.gr.handle_tpc_mpc_exception) { |
5702 | ret |= g->ops.gr.handle_tpc_mpc_exception(g, | 5891 | ret |= g->ops.gr.handle_tpc_mpc_exception(g, |
5703 | gpc, tpc, post_event); | 5892 | gpc, tpc, post_event); |
5893 | } | ||
5704 | 5894 | ||
5705 | return ret; | 5895 | return ret; |
5706 | } | 5896 | } |
@@ -5717,8 +5907,9 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5717 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " "); | 5907 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " "); |
5718 | 5908 | ||
5719 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | 5909 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { |
5720 | if ((exception1 & (1 << gpc)) == 0) | 5910 | if ((exception1 & (1 << gpc)) == 0) { |
5721 | continue; | 5911 | continue; |
5912 | } | ||
5722 | 5913 | ||
5723 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5914 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5724 | "GPC%d exception pending", gpc); | 5915 | "GPC%d exception pending", gpc); |
@@ -5731,8 +5922,9 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5731 | /* check if any tpc has an exception */ | 5922 | /* check if any tpc has an exception */ |
5732 | for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { | 5923 | for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { |
5733 | if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) & | 5924 | if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) & |
5734 | (1 << tpc)) == 0) | 5925 | (1 << tpc)) == 0) { |
5735 | continue; | 5926 | continue; |
5927 | } | ||
5736 | 5928 | ||
5737 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5929 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5738 | "GPC%d: TPC%d exception pending", gpc, tpc); | 5930 | "GPC%d: TPC%d exception pending", gpc, tpc); |
@@ -5776,11 +5968,13 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5776 | static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg, | 5968 | static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg, |
5777 | u32 global_esr) | 5969 | u32 global_esr) |
5778 | { | 5970 | { |
5779 | if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) | 5971 | if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) { |
5780 | g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT); | 5972 | g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT); |
5973 | } | ||
5781 | 5974 | ||
5782 | if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) | 5975 | if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) { |
5783 | g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE); | 5976 | g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE); |
5977 | } | ||
5784 | 5978 | ||
5785 | return 0; | 5979 | return 0; |
5786 | } | 5980 | } |
@@ -5802,12 +5996,14 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5802 | nvgpu_log_fn(g, " "); | 5996 | nvgpu_log_fn(g, " "); |
5803 | nvgpu_log(g, gpu_dbg_intr, "pgraph intr %08x", gr_intr); | 5997 | nvgpu_log(g, gpu_dbg_intr, "pgraph intr %08x", gr_intr); |
5804 | 5998 | ||
5805 | if (!gr_intr) | 5999 | if (!gr_intr) { |
5806 | return 0; | 6000 | return 0; |
6001 | } | ||
5807 | 6002 | ||
5808 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | 6003 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); |
5809 | if (gr_engine_id != FIFO_INVAL_ENGINE_ID) | 6004 | if (gr_engine_id != FIFO_INVAL_ENGINE_ID) { |
5810 | gr_engine_id = BIT(gr_engine_id); | 6005 | gr_engine_id = BIT(gr_engine_id); |
6006 | } | ||
5811 | 6007 | ||
5812 | grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); | 6008 | grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); |
5813 | grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); | 6009 | grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); |
@@ -5835,8 +6031,9 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5835 | nvgpu_err(g, "ch id is INVALID 0xffffffff"); | 6031 | nvgpu_err(g, "ch id is INVALID 0xffffffff"); |
5836 | } | 6032 | } |
5837 | 6033 | ||
5838 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) | 6034 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) { |
5839 | tsg = &g->fifo.tsg[ch->tsgid]; | 6035 | tsg = &g->fifo.tsg[ch->tsgid]; |
6036 | } | ||
5840 | 6037 | ||
5841 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | 6038 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5842 | "channel %d: addr 0x%08x, " | 6039 | "channel %d: addr 0x%08x, " |
@@ -6047,18 +6244,19 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6047 | } | 6244 | } |
6048 | 6245 | ||
6049 | if (need_reset) { | 6246 | if (need_reset) { |
6050 | if (tsgid != NVGPU_INVALID_TSG_ID) | 6247 | if (tsgid != NVGPU_INVALID_TSG_ID) { |
6051 | gk20a_fifo_recover(g, gr_engine_id, | 6248 | gk20a_fifo_recover(g, gr_engine_id, |
6052 | tsgid, true, true, true, | 6249 | tsgid, true, true, true, |
6053 | RC_TYPE_GR_FAULT); | 6250 | RC_TYPE_GR_FAULT); |
6054 | else if (ch) | 6251 | } else if (ch) { |
6055 | gk20a_fifo_recover(g, gr_engine_id, | 6252 | gk20a_fifo_recover(g, gr_engine_id, |
6056 | ch->chid, false, true, true, | 6253 | ch->chid, false, true, true, |
6057 | RC_TYPE_GR_FAULT); | 6254 | RC_TYPE_GR_FAULT); |
6058 | else | 6255 | } else { |
6059 | gk20a_fifo_recover(g, gr_engine_id, | 6256 | gk20a_fifo_recover(g, gr_engine_id, |
6060 | 0, false, false, true, | 6257 | 0, false, false, true, |
6061 | RC_TYPE_GR_FAULT); | 6258 | RC_TYPE_GR_FAULT); |
6259 | } | ||
6062 | } | 6260 | } |
6063 | 6261 | ||
6064 | if (gr_intr && !ch) { | 6262 | if (gr_intr && !ch) { |
@@ -6075,16 +6273,19 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6075 | grfifo_ctl | gr_gpfifo_ctl_access_f(1) | | 6273 | grfifo_ctl | gr_gpfifo_ctl_access_f(1) | |
6076 | gr_gpfifo_ctl_semaphore_access_f(1)); | 6274 | gr_gpfifo_ctl_semaphore_access_f(1)); |
6077 | 6275 | ||
6078 | if (gr_intr) | 6276 | if (gr_intr) { |
6079 | nvgpu_err(g, | 6277 | nvgpu_err(g, |
6080 | "unhandled gr interrupt 0x%08x", gr_intr); | 6278 | "unhandled gr interrupt 0x%08x", gr_intr); |
6279 | } | ||
6081 | 6280 | ||
6082 | /* Posting of BPT events should be the last thing in this function */ | 6281 | /* Posting of BPT events should be the last thing in this function */ |
6083 | if (global_esr && tsg) | 6282 | if (global_esr && tsg) { |
6084 | gk20a_gr_post_bpt_events(g, tsg, global_esr); | 6283 | gk20a_gr_post_bpt_events(g, tsg, global_esr); |
6284 | } | ||
6085 | 6285 | ||
6086 | if (ch) | 6286 | if (ch) { |
6087 | gk20a_channel_put(ch); | 6287 | gk20a_channel_put(ch); |
6288 | } | ||
6088 | 6289 | ||
6089 | return 0; | 6290 | return 0; |
6090 | } | 6291 | } |
@@ -6166,8 +6367,9 @@ int gk20a_gr_suspend(struct gk20a *g) | |||
6166 | 6367 | ||
6167 | ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g), | 6368 | ret = g->ops.gr.wait_empty(g, gk20a_get_gr_idle_timeout(g), |
6168 | GR_IDLE_CHECK_DEFAULT); | 6369 | GR_IDLE_CHECK_DEFAULT); |
6169 | if (ret) | 6370 | if (ret) { |
6170 | return ret; | 6371 | return ret; |
6372 | } | ||
6171 | 6373 | ||
6172 | gk20a_writel(g, gr_gpfifo_ctl_r(), | 6374 | gk20a_writel(g, gr_gpfifo_ctl_r(), |
6173 | gr_gpfifo_ctl_access_disabled_f()); | 6375 | gr_gpfifo_ctl_access_disabled_f()); |
@@ -6227,8 +6429,9 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6227 | if (pri_is_gpc_addr_shared(g, addr)) { | 6429 | if (pri_is_gpc_addr_shared(g, addr)) { |
6228 | *addr_type = CTXSW_ADDR_TYPE_GPC; | 6430 | *addr_type = CTXSW_ADDR_TYPE_GPC; |
6229 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; | 6431 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; |
6230 | } else | 6432 | } else { |
6231 | *gpc_num = pri_get_gpc_num(g, addr); | 6433 | *gpc_num = pri_get_gpc_num(g, addr); |
6434 | } | ||
6232 | 6435 | ||
6233 | if (pri_is_ppc_addr(g, gpc_addr)) { | 6436 | if (pri_is_ppc_addr(g, gpc_addr)) { |
6234 | *addr_type = CTXSW_ADDR_TYPE_PPC; | 6437 | *addr_type = CTXSW_ADDR_TYPE_PPC; |
@@ -6256,10 +6459,11 @@ int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6256 | return 0; | 6459 | return 0; |
6257 | } else if (g->ops.ltc.pri_is_ltc_addr(g, addr)) { | 6460 | } else if (g->ops.ltc.pri_is_ltc_addr(g, addr)) { |
6258 | *addr_type = CTXSW_ADDR_TYPE_LTCS; | 6461 | *addr_type = CTXSW_ADDR_TYPE_LTCS; |
6259 | if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) | 6462 | if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) { |
6260 | *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS; | 6463 | *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS; |
6261 | else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) | 6464 | } else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) { |
6262 | *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS; | 6465 | *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS; |
6466 | } | ||
6263 | return 0; | 6467 | return 0; |
6264 | } else if (pri_is_fbpa_addr(g, addr)) { | 6468 | } else if (pri_is_fbpa_addr(g, addr)) { |
6265 | *addr_type = CTXSW_ADDR_TYPE_FBPA; | 6469 | *addr_type = CTXSW_ADDR_TYPE_FBPA; |
@@ -6338,8 +6542,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6338 | &gpc_num, &tpc_num, &ppc_num, &be_num, | 6542 | &gpc_num, &tpc_num, &ppc_num, &be_num, |
6339 | &broadcast_flags); | 6543 | &broadcast_flags); |
6340 | nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); | 6544 | nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); |
6341 | if (err) | 6545 | if (err) { |
6342 | return err; | 6546 | return err; |
6547 | } | ||
6343 | 6548 | ||
6344 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || | 6549 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || |
6345 | (addr_type == CTXSW_ADDR_TYPE_BE)) { | 6550 | (addr_type == CTXSW_ADDR_TYPE_BE)) { |
@@ -6347,10 +6552,11 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6347 | * table. Convert a BE unicast address to a broadcast address | 6552 | * table. Convert a BE unicast address to a broadcast address |
6348 | * so that we can look up the offset. */ | 6553 | * so that we can look up the offset. */ |
6349 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && | 6554 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && |
6350 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) | 6555 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) { |
6351 | priv_addr_table[t++] = pri_be_shared_addr(g, addr); | 6556 | priv_addr_table[t++] = pri_be_shared_addr(g, addr); |
6352 | else | 6557 | } else { |
6353 | priv_addr_table[t++] = addr; | 6558 | priv_addr_table[t++] = addr; |
6559 | } | ||
6354 | 6560 | ||
6355 | *num_registers = t; | 6561 | *num_registers = t; |
6356 | return 0; | 6562 | return 0; |
@@ -6362,7 +6568,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6362 | if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { | 6568 | if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { |
6363 | for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { | 6569 | for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { |
6364 | 6570 | ||
6365 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | 6571 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) { |
6366 | for (tpc_num = 0; | 6572 | for (tpc_num = 0; |
6367 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6573 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
6368 | tpc_num++) { | 6574 | tpc_num++) { |
@@ -6371,11 +6577,12 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6371 | gpc_num, tpc_num); | 6577 | gpc_num, tpc_num); |
6372 | } | 6578 | } |
6373 | 6579 | ||
6374 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { | 6580 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { |
6375 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, | 6581 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, |
6376 | priv_addr_table, &t); | 6582 | priv_addr_table, &t); |
6377 | if (err) | 6583 | if (err) { |
6378 | return err; | 6584 | return err; |
6585 | } | ||
6379 | } else { | 6586 | } else { |
6380 | priv_addr = pri_gpc_addr(g, | 6587 | priv_addr = pri_gpc_addr(g, |
6381 | pri_gpccs_addr_mask(addr), | 6588 | pri_gpccs_addr_mask(addr), |
@@ -6383,8 +6590,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6383 | 6590 | ||
6384 | gpc_addr = pri_gpccs_addr_mask(priv_addr); | 6591 | gpc_addr = pri_gpccs_addr_mask(priv_addr); |
6385 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); | 6592 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
6386 | if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) | 6593 | if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) { |
6387 | continue; | 6594 | continue; |
6595 | } | ||
6388 | 6596 | ||
6389 | priv_addr_table[t++] = priv_addr; | 6597 | priv_addr_table[t++] = priv_addr; |
6390 | } | 6598 | } |
@@ -6406,7 +6614,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6406 | nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS), | 6614 | nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS), |
6407 | priv_addr_table, &t); | 6615 | priv_addr_table, &t); |
6408 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { | 6616 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { |
6409 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | 6617 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) { |
6410 | for (tpc_num = 0; | 6618 | for (tpc_num = 0; |
6411 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6619 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
6412 | tpc_num++) { | 6620 | tpc_num++) { |
@@ -6414,11 +6622,12 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6414 | pri_tpc_addr(g, pri_tpccs_addr_mask(addr), | 6622 | pri_tpc_addr(g, pri_tpccs_addr_mask(addr), |
6415 | gpc_num, tpc_num); | 6623 | gpc_num, tpc_num); |
6416 | } | 6624 | } |
6417 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) | 6625 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { |
6418 | err = gr_gk20a_split_ppc_broadcast_addr(g, | 6626 | err = gr_gk20a_split_ppc_broadcast_addr(g, |
6419 | addr, gpc_num, priv_addr_table, &t); | 6627 | addr, gpc_num, priv_addr_table, &t); |
6420 | else | 6628 | } else { |
6421 | priv_addr_table[t++] = addr; | 6629 | priv_addr_table[t++] = addr; |
6630 | } | ||
6422 | } | 6631 | } |
6423 | 6632 | ||
6424 | *num_registers = t; | 6633 | *num_registers = t; |
@@ -6450,8 +6659,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | |||
6450 | return -EINVAL; | 6659 | return -EINVAL; |
6451 | } | 6660 | } |
6452 | 6661 | ||
6453 | if (!g->gr.ctx_vars.golden_image_initialized) | 6662 | if (!g->gr.ctx_vars.golden_image_initialized) { |
6454 | return -ENODEV; | 6663 | return -ENODEV; |
6664 | } | ||
6455 | 6665 | ||
6456 | priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets); | 6666 | priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets); |
6457 | if (!priv_registers) { | 6667 | if (!priv_registers) { |
@@ -6473,8 +6683,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | |||
6473 | goto cleanup; | 6683 | goto cleanup; |
6474 | } | 6684 | } |
6475 | 6685 | ||
6476 | if ((max_offsets == 1) && (num_registers > 1)) | 6686 | if ((max_offsets == 1) && (num_registers > 1)) { |
6477 | num_registers = 1; | 6687 | num_registers = 1; |
6688 | } | ||
6478 | 6689 | ||
6479 | if (!g->gr.ctx_vars.local_golden_image) { | 6690 | if (!g->gr.ctx_vars.local_golden_image) { |
6480 | nvgpu_log_fn(g, "no context switch header info to work with"); | 6691 | nvgpu_log_fn(g, "no context switch header info to work with"); |
@@ -6501,8 +6712,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | |||
6501 | 6712 | ||
6502 | *num_offsets = num_registers; | 6713 | *num_offsets = num_registers; |
6503 | cleanup: | 6714 | cleanup: |
6504 | if (!IS_ERR_OR_NULL(priv_registers)) | 6715 | if (!IS_ERR_OR_NULL(priv_registers)) { |
6505 | nvgpu_kfree(g, priv_registers); | 6716 | nvgpu_kfree(g, priv_registers); |
6717 | } | ||
6506 | 6718 | ||
6507 | return err; | 6719 | return err; |
6508 | } | 6720 | } |
@@ -6526,11 +6738,13 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, | |||
6526 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 6738 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
6527 | 6739 | ||
6528 | /* implementation is crossed-up if either of these happen */ | 6740 | /* implementation is crossed-up if either of these happen */ |
6529 | if (max_offsets > potential_offsets) | 6741 | if (max_offsets > potential_offsets) { |
6530 | return -EINVAL; | 6742 | return -EINVAL; |
6743 | } | ||
6531 | 6744 | ||
6532 | if (!g->gr.ctx_vars.golden_image_initialized) | 6745 | if (!g->gr.ctx_vars.golden_image_initialized) { |
6533 | return -ENODEV; | 6746 | return -ENODEV; |
6747 | } | ||
6534 | 6748 | ||
6535 | priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets); | 6749 | priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets); |
6536 | if (!priv_registers) { | 6750 | if (!priv_registers) { |
@@ -6549,8 +6763,9 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, | |||
6549 | goto cleanup; | 6763 | goto cleanup; |
6550 | } | 6764 | } |
6551 | 6765 | ||
6552 | if ((max_offsets == 1) && (num_registers > 1)) | 6766 | if ((max_offsets == 1) && (num_registers > 1)) { |
6553 | num_registers = 1; | 6767 | num_registers = 1; |
6768 | } | ||
6554 | 6769 | ||
6555 | if (!g->gr.ctx_vars.local_golden_image) { | 6770 | if (!g->gr.ctx_vars.local_golden_image) { |
6556 | nvgpu_log_fn(g, "no context switch header info to work with"); | 6771 | nvgpu_log_fn(g, "no context switch header info to work with"); |
@@ -6591,8 +6806,9 @@ static u32 _ovr_perf_regs[17] = { 0, }; | |||
6591 | 6806 | ||
6592 | void gk20a_gr_init_ovr_sm_dsm_perf(void) | 6807 | void gk20a_gr_init_ovr_sm_dsm_perf(void) |
6593 | { | 6808 | { |
6594 | if (_ovr_perf_regs[0] != 0) | 6809 | if (_ovr_perf_regs[0] != 0) { |
6595 | return; | 6810 | return; |
6811 | } | ||
6596 | 6812 | ||
6597 | _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(); | 6813 | _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(); |
6598 | _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(); | 6814 | _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(); |
@@ -6640,8 +6856,9 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6640 | struct nvgpu_mem *ctxheader = &ch->ctx_header; | 6856 | struct nvgpu_mem *ctxheader = &ch->ctx_header; |
6641 | 6857 | ||
6642 | tsg = tsg_gk20a_from_ch(ch); | 6858 | tsg = tsg_gk20a_from_ch(ch); |
6643 | if (!tsg) | 6859 | if (!tsg) { |
6644 | return -EINVAL; | 6860 | return -EINVAL; |
6861 | } | ||
6645 | 6862 | ||
6646 | gr_ctx = &tsg->gr_ctx; | 6863 | gr_ctx = &tsg->gr_ctx; |
6647 | g->ops.gr.init_ovr_sm_dsm_perf(); | 6864 | g->ops.gr.init_ovr_sm_dsm_perf(); |
@@ -6657,16 +6874,18 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6657 | chk_addr = ((gpc_stride * gpc) + | 6874 | chk_addr = ((gpc_stride * gpc) + |
6658 | (tpc_in_gpc_stride * tpc) + | 6875 | (tpc_in_gpc_stride * tpc) + |
6659 | ovr_perf_regs[reg]); | 6876 | ovr_perf_regs[reg]); |
6660 | if (chk_addr != addr) | 6877 | if (chk_addr != addr) { |
6661 | continue; | 6878 | continue; |
6879 | } | ||
6662 | /* reset the patch count from previous | 6880 | /* reset the patch count from previous |
6663 | runs,if ucode has already processed | 6881 | runs,if ucode has already processed |
6664 | it */ | 6882 | it */ |
6665 | tmp = nvgpu_mem_rd(g, mem, | 6883 | tmp = nvgpu_mem_rd(g, mem, |
6666 | ctxsw_prog_main_image_patch_count_o()); | 6884 | ctxsw_prog_main_image_patch_count_o()); |
6667 | 6885 | ||
6668 | if (!tmp) | 6886 | if (!tmp) { |
6669 | gr_ctx->patch_ctx.data_count = 0; | 6887 | gr_ctx->patch_ctx.data_count = 0; |
6888 | } | ||
6670 | 6889 | ||
6671 | gr_gk20a_ctx_patch_write(g, gr_ctx, | 6890 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
6672 | addr, data, true); | 6891 | addr, data, true); |
@@ -6770,10 +6989,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6770 | u32 gpc_addr = 0; | 6989 | u32 gpc_addr = 0; |
6771 | gpc_num = pri_get_gpc_num(g, addr); | 6990 | gpc_num = pri_get_gpc_num(g, addr); |
6772 | gpc_addr = pri_gpccs_addr_mask(addr); | 6991 | gpc_addr = pri_gpccs_addr_mask(addr); |
6773 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) | 6992 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { |
6774 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); | 6993 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
6775 | else | 6994 | } else { |
6776 | return -EINVAL; | 6995 | return -EINVAL; |
6996 | } | ||
6777 | 6997 | ||
6778 | nvgpu_log_info(g, " gpc = %d tpc = %d", | 6998 | nvgpu_log_info(g, " gpc = %d tpc = %d", |
6779 | gpc_num, tpc_num); | 6999 | gpc_num, tpc_num); |
@@ -6896,8 +7116,9 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6896 | } | 7116 | } |
6897 | 7117 | ||
6898 | if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) && | 7118 | if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) && |
6899 | (ILLEGAL_ID == sm_dsm_perf_reg_id)) | 7119 | (ILLEGAL_ID == sm_dsm_perf_reg_id)) { |
6900 | return -EINVAL; | 7120 | return -EINVAL; |
7121 | } | ||
6901 | 7122 | ||
6902 | /* Skip the FECS extended header, nothing there for us now. */ | 7123 | /* Skip the FECS extended header, nothing there for us now. */ |
6903 | offset_to_segment += buffer_segments_size; | 7124 | offset_to_segment += buffer_segments_size; |
@@ -6986,8 +7207,9 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6986 | 7207 | ||
6987 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); | 7208 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); |
6988 | 7209 | ||
6989 | if (!g->gr.ctx_vars.valid) | 7210 | if (!g->gr.ctx_vars.valid) { |
6990 | return -EINVAL; | 7211 | return -EINVAL; |
7212 | } | ||
6991 | 7213 | ||
6992 | /* Process the SYS/BE segment. */ | 7214 | /* Process the SYS/BE segment. */ |
6993 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || | 7215 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || |
@@ -7032,8 +7254,9 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
7032 | } | 7254 | } |
7033 | } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || | 7255 | } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || |
7034 | (addr_type == CTXSW_ADDR_TYPE_ETPC)) { | 7256 | (addr_type == CTXSW_ADDR_TYPE_ETPC)) { |
7035 | if (!(g->ops.gr.get_egpc_base)) | 7257 | if (!(g->ops.gr.get_egpc_base)) { |
7036 | return -EINVAL; | 7258 | return -EINVAL; |
7259 | } | ||
7037 | 7260 | ||
7038 | for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) { | 7261 | for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) { |
7039 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) { | 7262 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) { |
@@ -7130,8 +7353,9 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
7130 | */ | 7353 | */ |
7131 | if ((!g->gr.ctx_vars.valid) || | 7354 | if ((!g->gr.ctx_vars.valid) || |
7132 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && | 7355 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && |
7133 | (num_pes_per_gpc > 1))) | 7356 | (num_pes_per_gpc > 1))) { |
7134 | return -EINVAL; | 7357 | return -EINVAL; |
7358 | } | ||
7135 | 7359 | ||
7136 | data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); | 7360 | data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); |
7137 | 7361 | ||
@@ -7237,8 +7461,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7237 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | 7461 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, |
7238 | "addr_type = %d, broadcast_flags: %08x", | 7462 | "addr_type = %d, broadcast_flags: %08x", |
7239 | addr_type, broadcast_flags); | 7463 | addr_type, broadcast_flags); |
7240 | if (err) | 7464 | if (err) { |
7241 | return err; | 7465 | return err; |
7466 | } | ||
7242 | 7467 | ||
7243 | context = (u8 *)context_buffer; | 7468 | context = (u8 *)context_buffer; |
7244 | if (!check_main_image_header_magic(context)) { | 7469 | if (!check_main_image_header_magic(context)) { |
@@ -7283,8 +7508,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7283 | addr_type, addr, | 7508 | addr_type, addr, |
7284 | 0, 0, 0, 0, | 7509 | 0, 0, 0, 0, |
7285 | &offset); | 7510 | &offset); |
7286 | if (err) | 7511 | if (err) { |
7287 | return err; | 7512 | return err; |
7513 | } | ||
7288 | 7514 | ||
7289 | *priv_offset = (offset_to_segment + offset); | 7515 | *priv_offset = (offset_to_segment + offset); |
7290 | return 0; | 7516 | return 0; |
@@ -7339,8 +7565,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7339 | addr_type, | 7565 | addr_type, |
7340 | num_tpcs, num_ppcs, reg_list_ppc_count, | 7566 | num_tpcs, num_ppcs, reg_list_ppc_count, |
7341 | &offset_in_segment); | 7567 | &offset_in_segment); |
7342 | if (err) | 7568 | if (err) { |
7343 | return -EINVAL; | 7569 | return -EINVAL; |
7570 | } | ||
7344 | 7571 | ||
7345 | offset_to_segment += offset_in_segment; | 7572 | offset_to_segment += offset_in_segment; |
7346 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | 7573 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, |
@@ -7352,8 +7579,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7352 | i, num_tpcs, | 7579 | i, num_tpcs, |
7353 | num_ppcs, ppc_mask, | 7580 | num_ppcs, ppc_mask, |
7354 | &offset); | 7581 | &offset); |
7355 | if (err) | 7582 | if (err) { |
7356 | return -EINVAL; | 7583 | return -EINVAL; |
7584 | } | ||
7357 | 7585 | ||
7358 | *priv_offset = offset_to_segment + offset; | 7586 | *priv_offset = offset_to_segment + offset; |
7359 | return 0; | 7587 | return 0; |
@@ -7370,11 +7598,13 @@ static int map_cmp(const void *a, const void *b) | |||
7370 | struct ctxsw_buf_offset_map_entry *e2 = | 7598 | struct ctxsw_buf_offset_map_entry *e2 = |
7371 | (struct ctxsw_buf_offset_map_entry *)b; | 7599 | (struct ctxsw_buf_offset_map_entry *)b; |
7372 | 7600 | ||
7373 | if (e1->addr < e2->addr) | 7601 | if (e1->addr < e2->addr) { |
7374 | return -1; | 7602 | return -1; |
7603 | } | ||
7375 | 7604 | ||
7376 | if (e1->addr > e2->addr) | 7605 | if (e1->addr > e2->addr) { |
7377 | return 1; | 7606 | return 1; |
7607 | } | ||
7378 | return 0; | 7608 | return 0; |
7379 | } | 7609 | } |
7380 | 7610 | ||
@@ -7387,15 +7617,17 @@ static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry | |||
7387 | u32 cnt = *count; | 7617 | u32 cnt = *count; |
7388 | u32 off = *offset; | 7618 | u32 off = *offset; |
7389 | 7619 | ||
7390 | if ((cnt + regs->count) > max_cnt) | 7620 | if ((cnt + regs->count) > max_cnt) { |
7391 | return -EINVAL; | 7621 | return -EINVAL; |
7622 | } | ||
7392 | 7623 | ||
7393 | for (idx = 0; idx < regs->count; idx++) { | 7624 | for (idx = 0; idx < regs->count; idx++) { |
7394 | if ((base + (regs->l[idx].addr & mask)) < 0xFFF) | 7625 | if ((base + (regs->l[idx].addr & mask)) < 0xFFF) { |
7395 | map[cnt].addr = base + (regs->l[idx].addr & mask) | 7626 | map[cnt].addr = base + (regs->l[idx].addr & mask) |
7396 | + NV_PCFG_BASE; | 7627 | + NV_PCFG_BASE; |
7397 | else | 7628 | } else { |
7398 | map[cnt].addr = base + (regs->l[idx].addr & mask); | 7629 | map[cnt].addr = base + (regs->l[idx].addr & mask); |
7630 | } | ||
7399 | map[cnt++].offset = off; | 7631 | map[cnt++].offset = off; |
7400 | off += 4; | 7632 | off += 4; |
7401 | } | 7633 | } |
@@ -7414,8 +7646,9 @@ static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g, | |||
7414 | u32 cnt = *count; | 7646 | u32 cnt = *count; |
7415 | u32 off = *offset; | 7647 | u32 off = *offset; |
7416 | 7648 | ||
7417 | if ((cnt + regs->count) > max_cnt) | 7649 | if ((cnt + regs->count) > max_cnt) { |
7418 | return -EINVAL; | 7650 | return -EINVAL; |
7651 | } | ||
7419 | 7652 | ||
7420 | /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1 | 7653 | /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1 |
7421 | * To handle the case of PPC registers getting added into GPC, the below | 7654 | * To handle the case of PPC registers getting added into GPC, the below |
@@ -7434,8 +7667,9 @@ static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g, | |||
7434 | 7667 | ||
7435 | map[cnt].addr = base + ppc_in_gpc_base | 7668 | map[cnt].addr = base + ppc_in_gpc_base |
7436 | + (regs->l[idx].addr & ppcmask); | 7669 | + (regs->l[idx].addr & ppcmask); |
7437 | } else | 7670 | } else { |
7438 | map[cnt].addr = base + (regs->l[idx].addr & mask); | 7671 | map[cnt].addr = base + (regs->l[idx].addr & mask); |
7672 | } | ||
7439 | map[cnt++].offset = off; | 7673 | map[cnt++].offset = off; |
7440 | off += 4; | 7674 | off += 4; |
7441 | } | 7675 | } |
@@ -7453,8 +7687,9 @@ static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map, | |||
7453 | u32 cnt = *count; | 7687 | u32 cnt = *count; |
7454 | u32 off = *offset; | 7688 | u32 off = *offset; |
7455 | 7689 | ||
7456 | if ((cnt + regs->count) > max_cnt) | 7690 | if ((cnt + regs->count) > max_cnt) { |
7457 | return -EINVAL; | 7691 | return -EINVAL; |
7692 | } | ||
7458 | 7693 | ||
7459 | for (idx = 0; idx < regs->count; idx++) { | 7694 | for (idx = 0; idx < regs->count; idx++) { |
7460 | map[cnt].addr = base + (regs->l[idx].addr & mask); | 7695 | map[cnt].addr = base + (regs->l[idx].addr & mask); |
@@ -7481,8 +7716,9 @@ static int add_ctxsw_buffer_map_entries_subunits( | |||
7481 | u32 cnt = *count; | 7716 | u32 cnt = *count; |
7482 | u32 off = *offset; | 7717 | u32 off = *offset; |
7483 | 7718 | ||
7484 | if ((cnt + (regs->count * num_units)) > max_cnt) | 7719 | if ((cnt + (regs->count * num_units)) > max_cnt) { |
7485 | return -EINVAL; | 7720 | return -EINVAL; |
7721 | } | ||
7486 | 7722 | ||
7487 | /* Data is interleaved for units in ctxsw buffer */ | 7723 | /* Data is interleaved for units in ctxsw buffer */ |
7488 | for (idx = 0; idx < regs->count; idx++) { | 7724 | for (idx = 0; idx < regs->count; idx++) { |
@@ -7529,8 +7765,9 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, | |||
7529 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, | 7765 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, |
7530 | count, offset, max_cnt, base, num_tpcs, | 7766 | count, offset, max_cnt, base, num_tpcs, |
7531 | tpc_in_gpc_stride, | 7767 | tpc_in_gpc_stride, |
7532 | (tpc_in_gpc_stride - 1))) | 7768 | (tpc_in_gpc_stride - 1))) { |
7533 | return -EINVAL; | 7769 | return -EINVAL; |
7770 | } | ||
7534 | 7771 | ||
7535 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; | 7772 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; |
7536 | base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; | 7773 | base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; |
@@ -7538,33 +7775,38 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, | |||
7538 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, | 7775 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, |
7539 | count, offset, max_cnt, base, num_ppcs, | 7776 | count, offset, max_cnt, base, num_ppcs, |
7540 | ppc_in_gpc_stride, | 7777 | ppc_in_gpc_stride, |
7541 | (ppc_in_gpc_stride - 1))) | 7778 | (ppc_in_gpc_stride - 1))) { |
7542 | return -EINVAL; | 7779 | return -EINVAL; |
7780 | } | ||
7543 | 7781 | ||
7544 | base = gpc_base + (gpc_stride * gpc_num); | 7782 | base = gpc_base + (gpc_stride * gpc_num); |
7545 | if (add_ctxsw_buffer_map_entries_pmgpc(g, map, | 7783 | if (add_ctxsw_buffer_map_entries_pmgpc(g, map, |
7546 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, | 7784 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, |
7547 | count, offset, max_cnt, base, | 7785 | count, offset, max_cnt, base, |
7548 | (gpc_stride - 1))) | 7786 | (gpc_stride - 1))) { |
7549 | return -EINVAL; | 7787 | return -EINVAL; |
7788 | } | ||
7550 | 7789 | ||
7551 | base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num; | 7790 | base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num; |
7552 | if (add_ctxsw_buffer_map_entries(map, | 7791 | if (add_ctxsw_buffer_map_entries(map, |
7553 | &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc, | 7792 | &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc, |
7554 | count, offset, max_cnt, base, ~0)) | 7793 | count, offset, max_cnt, base, ~0)) { |
7555 | return -EINVAL; | 7794 | return -EINVAL; |
7795 | } | ||
7556 | 7796 | ||
7557 | base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num); | 7797 | base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num); |
7558 | if (add_ctxsw_buffer_map_entries(map, | 7798 | if (add_ctxsw_buffer_map_entries(map, |
7559 | &g->gr.ctx_vars.ctxsw_regs.perf_gpc, | 7799 | &g->gr.ctx_vars.ctxsw_regs.perf_gpc, |
7560 | count, offset, max_cnt, base, ~0)) | 7800 | count, offset, max_cnt, base, ~0)) { |
7561 | return -EINVAL; | 7801 | return -EINVAL; |
7802 | } | ||
7562 | 7803 | ||
7563 | base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num); | 7804 | base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num); |
7564 | if (add_ctxsw_buffer_map_entries(map, | 7805 | if (add_ctxsw_buffer_map_entries(map, |
7565 | &g->gr.ctx_vars.ctxsw_regs.gpc_router, | 7806 | &g->gr.ctx_vars.ctxsw_regs.gpc_router, |
7566 | count, offset, max_cnt, base, ~0)) | 7807 | count, offset, max_cnt, base, ~0)) { |
7567 | return -EINVAL; | 7808 | return -EINVAL; |
7809 | } | ||
7568 | 7810 | ||
7569 | /* Counter Aggregation Unit, if available */ | 7811 | /* Counter Aggregation Unit, if available */ |
7570 | if (g->gr.ctx_vars.ctxsw_regs.pm_cau.count) { | 7812 | if (g->gr.ctx_vars.ctxsw_regs.pm_cau.count) { |
@@ -7574,8 +7816,9 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, | |||
7574 | &g->gr.ctx_vars.ctxsw_regs.pm_cau, | 7816 | &g->gr.ctx_vars.ctxsw_regs.pm_cau, |
7575 | count, offset, max_cnt, base, num_tpcs, | 7817 | count, offset, max_cnt, base, num_tpcs, |
7576 | tpc_in_gpc_stride, | 7818 | tpc_in_gpc_stride, |
7577 | (tpc_in_gpc_stride - 1))) | 7819 | (tpc_in_gpc_stride - 1))) { |
7578 | return -EINVAL; | 7820 | return -EINVAL; |
7821 | } | ||
7579 | } | 7822 | } |
7580 | 7823 | ||
7581 | *offset = ALIGN(*offset, 256); | 7824 | *offset = ALIGN(*offset, 256); |
@@ -7678,28 +7921,33 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7678 | map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); | 7921 | map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); |
7679 | 7922 | ||
7680 | map = nvgpu_big_zalloc(g, map_size); | 7923 | map = nvgpu_big_zalloc(g, map_size); |
7681 | if (!map) | 7924 | if (!map) { |
7682 | return -ENOMEM; | 7925 | return -ENOMEM; |
7926 | } | ||
7683 | 7927 | ||
7684 | /* Add entries from _LIST_pm_ctx_reg_SYS */ | 7928 | /* Add entries from _LIST_pm_ctx_reg_SYS */ |
7685 | if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, | 7929 | if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, |
7686 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) | 7930 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) { |
7687 | goto cleanup; | 7931 | goto cleanup; |
7932 | } | ||
7688 | 7933 | ||
7689 | /* Add entries from _LIST_nv_perf_ctx_reg_SYS */ | 7934 | /* Add entries from _LIST_nv_perf_ctx_reg_SYS */ |
7690 | if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys, | 7935 | if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys, |
7691 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) | 7936 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) { |
7692 | goto cleanup; | 7937 | goto cleanup; |
7938 | } | ||
7693 | 7939 | ||
7694 | /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/ | 7940 | /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/ |
7695 | if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router, | 7941 | if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router, |
7696 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) | 7942 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) { |
7697 | goto cleanup; | 7943 | goto cleanup; |
7944 | } | ||
7698 | 7945 | ||
7699 | /* Add entries from _LIST_nv_perf_pma_ctx_reg*/ | 7946 | /* Add entries from _LIST_nv_perf_pma_ctx_reg*/ |
7700 | if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma, | 7947 | if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma, |
7701 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) | 7948 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) { |
7702 | goto cleanup; | 7949 | goto cleanup; |
7950 | } | ||
7703 | 7951 | ||
7704 | offset = ALIGN(offset, 256); | 7952 | offset = ALIGN(offset, 256); |
7705 | 7953 | ||
@@ -7710,46 +7958,52 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7710 | hwpm_ctxsw_reg_count_max, 0, | 7958 | hwpm_ctxsw_reg_count_max, 0, |
7711 | g->gr.num_fbps, | 7959 | g->gr.num_fbps, |
7712 | g->ops.gr.get_pmm_per_chiplet_offset(), | 7960 | g->ops.gr.get_pmm_per_chiplet_offset(), |
7713 | ~0)) | 7961 | ~0)) { |
7714 | goto cleanup; | 7962 | goto cleanup; |
7963 | } | ||
7715 | 7964 | ||
7716 | /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */ | 7965 | /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */ |
7717 | if (add_ctxsw_buffer_map_entries_subunits(map, | 7966 | if (add_ctxsw_buffer_map_entries_subunits(map, |
7718 | &g->gr.ctx_vars.ctxsw_regs.fbp_router, | 7967 | &g->gr.ctx_vars.ctxsw_regs.fbp_router, |
7719 | &count, &offset, | 7968 | &count, &offset, |
7720 | hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps, | 7969 | hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps, |
7721 | NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) | 7970 | NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) { |
7722 | goto cleanup; | 7971 | goto cleanup; |
7972 | } | ||
7723 | 7973 | ||
7724 | /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */ | 7974 | /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */ |
7725 | if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map, | 7975 | if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map, |
7726 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, | 7976 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, |
7727 | &count, &offset, | 7977 | &count, &offset, |
7728 | hwpm_ctxsw_reg_count_max, 0, | 7978 | hwpm_ctxsw_reg_count_max, 0, |
7729 | num_fbpas, fbpa_stride, ~0)) | 7979 | num_fbpas, fbpa_stride, ~0)) { |
7730 | goto cleanup; | 7980 | goto cleanup; |
7981 | } | ||
7731 | 7982 | ||
7732 | /* Add entries from _LIST_nv_pm_rop_ctx_regs */ | 7983 | /* Add entries from _LIST_nv_pm_rop_ctx_regs */ |
7733 | if (add_ctxsw_buffer_map_entries(map, | 7984 | if (add_ctxsw_buffer_map_entries(map, |
7734 | &g->gr.ctx_vars.ctxsw_regs.pm_rop, | 7985 | &g->gr.ctx_vars.ctxsw_regs.pm_rop, |
7735 | &count, &offset, | 7986 | &count, &offset, |
7736 | hwpm_ctxsw_reg_count_max, 0, ~0)) | 7987 | hwpm_ctxsw_reg_count_max, 0, ~0)) { |
7737 | goto cleanup; | 7988 | goto cleanup; |
7989 | } | ||
7738 | 7990 | ||
7739 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ | 7991 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ |
7740 | if (add_ctxsw_buffer_map_entries_subunits(map, | 7992 | if (add_ctxsw_buffer_map_entries_subunits(map, |
7741 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, | 7993 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, |
7742 | &count, &offset, | 7994 | &count, &offset, |
7743 | hwpm_ctxsw_reg_count_max, 0, | 7995 | hwpm_ctxsw_reg_count_max, 0, |
7744 | num_ltc, ltc_stride, ~0)) | 7996 | num_ltc, ltc_stride, ~0)) { |
7745 | goto cleanup; | 7997 | goto cleanup; |
7998 | } | ||
7746 | 7999 | ||
7747 | offset = ALIGN(offset, 256); | 8000 | offset = ALIGN(offset, 256); |
7748 | 8001 | ||
7749 | /* Add GPC entries */ | 8002 | /* Add GPC entries */ |
7750 | if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset, | 8003 | if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset, |
7751 | hwpm_ctxsw_reg_count_max)) | 8004 | hwpm_ctxsw_reg_count_max)) { |
7752 | goto cleanup; | 8005 | goto cleanup; |
8006 | } | ||
7753 | 8007 | ||
7754 | if (offset > hwpm_ctxsw_buffer_size) { | 8008 | if (offset > hwpm_ctxsw_buffer_size) { |
7755 | nvgpu_err(g, "offset > buffer size"); | 8009 | nvgpu_err(g, "offset > buffer size"); |
@@ -7792,8 +8046,9 @@ static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g, | |||
7792 | /* Create map of pri address and pm offset if necessary */ | 8046 | /* Create map of pri address and pm offset if necessary */ |
7793 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) { | 8047 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) { |
7794 | err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g); | 8048 | err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g); |
7795 | if (err) | 8049 | if (err) { |
7796 | return err; | 8050 | return err; |
8051 | } | ||
7797 | } | 8052 | } |
7798 | 8053 | ||
7799 | *priv_offset = 0; | 8054 | *priv_offset = 0; |
@@ -7804,9 +8059,9 @@ static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g, | |||
7804 | map_key.addr = addr; | 8059 | map_key.addr = addr; |
7805 | result = bsearch(&map_key, map, count, sizeof(*map), map_cmp); | 8060 | result = bsearch(&map_key, map, count, sizeof(*map), map_cmp); |
7806 | 8061 | ||
7807 | if (result) | 8062 | if (result) { |
7808 | *priv_offset = result->offset; | 8063 | *priv_offset = result->offset; |
7809 | else { | 8064 | } else { |
7810 | nvgpu_err(g, "Lookup failed for address 0x%x", addr); | 8065 | nvgpu_err(g, "Lookup failed for address 0x%x", addr); |
7811 | err = -EINVAL; | 8066 | err = -EINVAL; |
7812 | } | 8067 | } |
@@ -7827,8 +8082,9 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | |||
7827 | * valid bit must be checked to be absolutely certain that a | 8082 | * valid bit must be checked to be absolutely certain that a |
7828 | * valid context is currently resident. | 8083 | * valid context is currently resident. |
7829 | */ | 8084 | */ |
7830 | if (!gr_fecs_current_ctx_valid_v(curr_gr_ctx)) | 8085 | if (!gr_fecs_current_ctx_valid_v(curr_gr_ctx)) { |
7831 | return NULL; | 8086 | return NULL; |
8087 | } | ||
7832 | 8088 | ||
7833 | curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx, | 8089 | curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx, |
7834 | &curr_gr_tsgid); | 8090 | &curr_gr_tsgid); |
@@ -7841,14 +8097,17 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | |||
7841 | ch->tsgid, | 8097 | ch->tsgid, |
7842 | ch->chid); | 8098 | ch->chid); |
7843 | 8099 | ||
7844 | if (!curr_ch) | 8100 | if (!curr_ch) { |
7845 | return false; | 8101 | return false; |
8102 | } | ||
7846 | 8103 | ||
7847 | if (ch->chid == curr_ch->chid) | 8104 | if (ch->chid == curr_ch->chid) { |
7848 | ret = true; | 8105 | ret = true; |
8106 | } | ||
7849 | 8107 | ||
7850 | if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) | 8108 | if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) { |
7851 | ret = true; | 8109 | ret = true; |
8110 | } | ||
7852 | 8111 | ||
7853 | gk20a_channel_put(curr_ch); | 8112 | gk20a_channel_put(curr_ch); |
7854 | return ret; | 8113 | return ret; |
@@ -7879,8 +8138,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7879 | num_ctx_wr_ops, num_ctx_rd_ops); | 8138 | num_ctx_wr_ops, num_ctx_rd_ops); |
7880 | 8139 | ||
7881 | tsg = tsg_gk20a_from_ch(ch); | 8140 | tsg = tsg_gk20a_from_ch(ch); |
7882 | if (!tsg) | 8141 | if (!tsg) { |
7883 | return -EINVAL; | 8142 | return -EINVAL; |
8143 | } | ||
7884 | 8144 | ||
7885 | gr_ctx = &tsg->gr_ctx; | 8145 | gr_ctx = &tsg->gr_ctx; |
7886 | 8146 | ||
@@ -7891,15 +8151,17 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7891 | /* only do ctx ops and only on the right pass */ | 8151 | /* only do ctx ops and only on the right pass */ |
7892 | if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || | 8152 | if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || |
7893 | (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || | 8153 | (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || |
7894 | ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) | 8154 | ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) { |
7895 | continue; | 8155 | continue; |
8156 | } | ||
7896 | 8157 | ||
7897 | /* if this is a quad access, setup for special access*/ | 8158 | /* if this is a quad access, setup for special access*/ |
7898 | if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) | 8159 | if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) |
7899 | && g->ops.gr.access_smpc_reg) | 8160 | && g->ops.gr.access_smpc_reg) { |
7900 | g->ops.gr.access_smpc_reg(g, | 8161 | g->ops.gr.access_smpc_reg(g, |
7901 | ctx_ops[i].quad, | 8162 | ctx_ops[i].quad, |
7902 | ctx_ops[i].offset); | 8163 | ctx_ops[i].offset); |
8164 | } | ||
7903 | offset = ctx_ops[i].offset; | 8165 | offset = ctx_ops[i].offset; |
7904 | 8166 | ||
7905 | if (pass == 0) { /* write pass */ | 8167 | if (pass == 0) { /* write pass */ |
@@ -7938,8 +8200,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7938 | nvgpu_log(g, gpu_dbg_gpu_dbg, | 8200 | nvgpu_log(g, gpu_dbg_gpu_dbg, |
7939 | "direct rd: offset=0x%x v=0x%x", | 8201 | "direct rd: offset=0x%x v=0x%x", |
7940 | offset, ctx_ops[i].value_lo); | 8202 | offset, ctx_ops[i].value_lo); |
7941 | } else | 8203 | } else { |
7942 | ctx_ops[i].value_hi = 0; | 8204 | ctx_ops[i].value_hi = 0; |
8205 | } | ||
7943 | } | 8206 | } |
7944 | ctx_op_nr++; | 8207 | ctx_op_nr++; |
7945 | } | 8208 | } |
@@ -7956,8 +8219,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7956 | offset_addrs = offsets + max_offsets; | 8219 | offset_addrs = offsets + max_offsets; |
7957 | 8220 | ||
7958 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); | 8221 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); |
7959 | if (err) | 8222 | if (err) { |
7960 | goto cleanup; | 8223 | goto cleanup; |
8224 | } | ||
7961 | 8225 | ||
7962 | g->ops.mm.l2_flush(g, true); | 8226 | g->ops.mm.l2_flush(g, true); |
7963 | 8227 | ||
@@ -7973,8 +8237,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7973 | /* only do ctx ops and only on the right pass */ | 8237 | /* only do ctx ops and only on the right pass */ |
7974 | if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || | 8238 | if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || |
7975 | (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || | 8239 | (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || |
7976 | ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) | 8240 | ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) { |
7977 | continue; | 8241 | continue; |
8242 | } | ||
7978 | 8243 | ||
7979 | err = gr_gk20a_get_ctx_buffer_offsets(g, | 8244 | err = gr_gk20a_get_ctx_buffer_offsets(g, |
7980 | ctx_ops[i].offset, | 8245 | ctx_ops[i].offset, |
@@ -7984,8 +8249,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7984 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), | 8249 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), |
7985 | ctx_ops[i].quad); | 8250 | ctx_ops[i].quad); |
7986 | if (!err) { | 8251 | if (!err) { |
7987 | if (!gr_ctx_ready) | 8252 | if (!gr_ctx_ready) { |
7988 | gr_ctx_ready = true; | 8253 | gr_ctx_ready = true; |
8254 | } | ||
7989 | current_mem = &gr_ctx->mem; | 8255 | current_mem = &gr_ctx->mem; |
7990 | } else { | 8256 | } else { |
7991 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 8257 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
@@ -8016,17 +8282,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8016 | 8282 | ||
8017 | /* if this is a quad access, setup for special access*/ | 8283 | /* if this is a quad access, setup for special access*/ |
8018 | if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) && | 8284 | if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) && |
8019 | g->ops.gr.access_smpc_reg) | 8285 | g->ops.gr.access_smpc_reg) { |
8020 | g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad, | 8286 | g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad, |
8021 | ctx_ops[i].offset); | 8287 | ctx_ops[i].offset); |
8288 | } | ||
8022 | 8289 | ||
8023 | for (j = 0; j < num_offsets; j++) { | 8290 | for (j = 0; j < num_offsets; j++) { |
8024 | /* sanity check gr ctxt offsets, | 8291 | /* sanity check gr ctxt offsets, |
8025 | * don't write outside, worst case | 8292 | * don't write outside, worst case |
8026 | */ | 8293 | */ |
8027 | if ((current_mem == &gr_ctx->mem) && | 8294 | if ((current_mem == &gr_ctx->mem) && |
8028 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) | 8295 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) { |
8029 | continue; | 8296 | continue; |
8297 | } | ||
8030 | if (pass == 0) { /* write pass */ | 8298 | if (pass == 0) { /* write pass */ |
8031 | v = nvgpu_mem_rd(g, current_mem, offsets[j]); | 8299 | v = nvgpu_mem_rd(g, current_mem, offsets[j]); |
8032 | v &= ~ctx_ops[i].and_n_mask_lo; | 8300 | v &= ~ctx_ops[i].and_n_mask_lo; |
@@ -8067,8 +8335,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8067 | nvgpu_log(g, gpu_dbg_gpu_dbg, | 8335 | nvgpu_log(g, gpu_dbg_gpu_dbg, |
8068 | "context rd: offset=0x%x v=0x%x", | 8336 | "context rd: offset=0x%x v=0x%x", |
8069 | offsets[0] + 4, ctx_ops[i].value_hi); | 8337 | offsets[0] + 4, ctx_ops[i].value_hi); |
8070 | } else | 8338 | } else { |
8071 | ctx_ops[i].value_hi = 0; | 8339 | ctx_ops[i].value_hi = 0; |
8340 | } | ||
8072 | } | 8341 | } |
8073 | } | 8342 | } |
8074 | ctx_op_nr++; | 8343 | ctx_op_nr++; |
@@ -8076,11 +8345,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8076 | } | 8345 | } |
8077 | 8346 | ||
8078 | cleanup: | 8347 | cleanup: |
8079 | if (offsets) | 8348 | if (offsets) { |
8080 | nvgpu_kfree(g, offsets); | 8349 | nvgpu_kfree(g, offsets); |
8350 | } | ||
8081 | 8351 | ||
8082 | if (gr_ctx->patch_ctx.mem.cpu_va) | 8352 | if (gr_ctx->patch_ctx.mem.cpu_va) { |
8083 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); | 8353 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); |
8354 | } | ||
8084 | 8355 | ||
8085 | return err; | 8356 | return err; |
8086 | } | 8357 | } |
@@ -8382,14 +8653,16 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
8382 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 8653 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
8383 | 8654 | ||
8384 | ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); | 8655 | ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); |
8385 | if (!ops) | 8656 | if (!ops) { |
8386 | return -ENOMEM; | 8657 | return -ENOMEM; |
8658 | } | ||
8387 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | 8659 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { |
8388 | int gpc, tpc; | 8660 | int gpc, tpc; |
8389 | u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val; | 8661 | u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val; |
8390 | 8662 | ||
8391 | if (!(sms & (1 << sm_id))) | 8663 | if (!(sms & (1 << sm_id))) { |
8392 | continue; | 8664 | continue; |
8665 | } | ||
8393 | 8666 | ||
8394 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 8667 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
8395 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 8668 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
@@ -8422,8 +8695,9 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
8422 | } | 8695 | } |
8423 | 8696 | ||
8424 | err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0); | 8697 | err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0); |
8425 | if (err) | 8698 | if (err) { |
8426 | nvgpu_err(g, "Failed to access register"); | 8699 | nvgpu_err(g, "Failed to access register"); |
8700 | } | ||
8427 | nvgpu_kfree(g, ops); | 8701 | nvgpu_kfree(g, ops); |
8428 | return err; | 8702 | return err; |
8429 | } | 8703 | } |
@@ -8490,15 +8764,17 @@ int gr_gk20a_suspend_contexts(struct gk20a *g, | |||
8490 | ch = g->fifo.channel + ch_data->chid; | 8764 | ch = g->fifo.channel + ch_data->chid; |
8491 | 8765 | ||
8492 | ctx_resident = gr_gk20a_suspend_context(ch); | 8766 | ctx_resident = gr_gk20a_suspend_context(ch); |
8493 | if (ctx_resident) | 8767 | if (ctx_resident) { |
8494 | local_ctx_resident_ch_fd = ch_data->channel_fd; | 8768 | local_ctx_resident_ch_fd = ch_data->channel_fd; |
8769 | } | ||
8495 | } | 8770 | } |
8496 | 8771 | ||
8497 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | 8772 | nvgpu_mutex_release(&dbg_s->ch_list_lock); |
8498 | 8773 | ||
8499 | err = gr_gk20a_enable_ctxsw(g); | 8774 | err = gr_gk20a_enable_ctxsw(g); |
8500 | if (err) | 8775 | if (err) { |
8501 | nvgpu_err(g, "unable to restart ctxsw!"); | 8776 | nvgpu_err(g, "unable to restart ctxsw!"); |
8777 | } | ||
8502 | 8778 | ||
8503 | *ctx_resident_ch_fd = local_ctx_resident_ch_fd; | 8779 | *ctx_resident_ch_fd = local_ctx_resident_ch_fd; |
8504 | 8780 | ||
@@ -8531,13 +8807,15 @@ int gr_gk20a_resume_contexts(struct gk20a *g, | |||
8531 | ch = g->fifo.channel + ch_data->chid; | 8807 | ch = g->fifo.channel + ch_data->chid; |
8532 | 8808 | ||
8533 | ctx_resident = gr_gk20a_resume_context(ch); | 8809 | ctx_resident = gr_gk20a_resume_context(ch); |
8534 | if (ctx_resident) | 8810 | if (ctx_resident) { |
8535 | local_ctx_resident_ch_fd = ch_data->channel_fd; | 8811 | local_ctx_resident_ch_fd = ch_data->channel_fd; |
8812 | } | ||
8536 | } | 8813 | } |
8537 | 8814 | ||
8538 | err = gr_gk20a_enable_ctxsw(g); | 8815 | err = gr_gk20a_enable_ctxsw(g); |
8539 | if (err) | 8816 | if (err) { |
8540 | nvgpu_err(g, "unable to restart ctxsw!"); | 8817 | nvgpu_err(g, "unable to restart ctxsw!"); |
8818 | } | ||
8541 | 8819 | ||
8542 | *ctx_resident_ch_fd = local_ctx_resident_ch_fd; | 8820 | *ctx_resident_ch_fd = local_ctx_resident_ch_fd; |
8543 | 8821 | ||