summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-12-15 12:04:15 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-17 15:29:09 -0500
commit2f6698b863c9cc1db6455637b7c72e812b470b93 (patch)
treed0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff)
gpu: nvgpu: Make graphics context property of TSG
Move graphics context ownership to TSG instead of channel. Combine channel_ctx_gk20a and gr_ctx_desc to one structure, because the split between them was arbitrary. Move context header to be property of channel. Bug 1842197 Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1639532 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c641
1 files changed, 320 insertions, 321 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 263ae030..f8af091b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -85,18 +85,19 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
85static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); 85static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
86static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, 86static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
87 struct channel_gk20a *c); 87 struct channel_gk20a *c);
88static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); 88static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
89 89 struct vm_gk20a *vm,
90/* channel gr ctx buffer */ 90 struct nvgpu_gr_ctx *gr_ctx);
91static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, 91static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
92 struct channel_gk20a *c, 92 struct vm_gk20a *vm,
93 u32 class, u32 padding); 93 struct nvgpu_gr_ctx *gr_ctx);
94static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
95 94
96/* channel patch ctx buffer */ 95/* channel patch ctx buffer */
97static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, 96static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
98 struct channel_gk20a *c); 97 struct channel_gk20a *c);
99static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); 98static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
99 struct vm_gk20a *vm,
100 struct nvgpu_gr_ctx *gr_ctx);
100 101
101/* golden ctx image */ 102/* golden ctx image */
102static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, 103static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
@@ -108,8 +109,16 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
108 struct channel_gk20a *c, 109 struct channel_gk20a *c,
109 u32 *ctx_id) 110 u32 *ctx_id)
110{ 111{
111 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 112 struct tsg_gk20a *tsg;
112 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 113 struct nvgpu_gr_ctx *gr_ctx = NULL;
114 struct nvgpu_mem *mem = NULL;
115
116 tsg = tsg_gk20a_from_ch(c);
117 if (!tsg)
118 return -EINVAL;
119
120 gr_ctx = &tsg->gr_ctx;
121 mem = &gr_ctx->mem;
113 122
114 /* Channel gr_ctx buffer is gpu cacheable. 123 /* Channel gr_ctx buffer is gpu cacheable.
115 Flush and invalidate before cpu update. */ 124 Flush and invalidate before cpu update. */
@@ -671,62 +680,62 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
671 */ 680 */
672 681
673int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, 682int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
674 struct channel_ctx_gk20a *ch_ctx, 683 struct nvgpu_gr_ctx *gr_ctx,
675 bool update_patch_count) 684 bool update_patch_count)
676{ 685{
677 int err = 0; 686 int err = 0;
678 687
679 err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); 688 err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem);
680 if (err) 689 if (err)
681 return err; 690 return err;
682 691
683 if (update_patch_count) { 692 if (update_patch_count) {
684 /* reset patch count if ucode has already processed it */ 693 /* reset patch count if ucode has already processed it */
685 ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, 694 gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
686 &ch_ctx->gr_ctx->mem, 695 &gr_ctx->mem,
687 ctxsw_prog_main_image_patch_count_o()); 696 ctxsw_prog_main_image_patch_count_o());
688 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", 697 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
689 ch_ctx->patch_ctx.data_count); 698 gr_ctx->patch_ctx.data_count);
690 } 699 }
691 return 0; 700 return 0;
692} 701}
693 702
694void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 703void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
695 struct channel_ctx_gk20a *ch_ctx, 704 struct nvgpu_gr_ctx *gr_ctx,
696 bool update_patch_count) 705 bool update_patch_count)
697{ 706{
698 nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); 707 nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem);
699 708
700 /* Write context count to context image if it is mapped */ 709 /* Write context count to context image if it is mapped */
701 if (update_patch_count) { 710 if (update_patch_count) {
702 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, 711 nvgpu_mem_wr(g, &gr_ctx->mem,
703 ctxsw_prog_main_image_patch_count_o(), 712 ctxsw_prog_main_image_patch_count_o(),
704 ch_ctx->patch_ctx.data_count); 713 gr_ctx->patch_ctx.data_count);
705 nvgpu_log(g, gpu_dbg_info, "write patch count %d", 714 nvgpu_log(g, gpu_dbg_info, "write patch count %d",
706 ch_ctx->patch_ctx.data_count); 715 gr_ctx->patch_ctx.data_count);
707 } 716 }
708} 717}
709 718
710void gr_gk20a_ctx_patch_write(struct gk20a *g, 719void gr_gk20a_ctx_patch_write(struct gk20a *g,
711 struct channel_ctx_gk20a *ch_ctx, 720 struct nvgpu_gr_ctx *gr_ctx,
712 u32 addr, u32 data, bool patch) 721 u32 addr, u32 data, bool patch)
713{ 722{
714 if (patch) { 723 if (patch) {
715 u32 patch_slot = ch_ctx->patch_ctx.data_count * 724 u32 patch_slot = gr_ctx->patch_ctx.data_count *
716 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; 725 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
717 if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( 726 if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
718 ch_ctx->patch_ctx.mem.size) - 727 gr_ctx->patch_ctx.mem.size) -
719 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { 728 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
720 nvgpu_err(g, "failed to access patch_slot %d", 729 nvgpu_err(g, "failed to access patch_slot %d",
721 patch_slot); 730 patch_slot);
722 return; 731 return;
723 } 732 }
724 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); 733 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr);
725 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); 734 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data);
726 ch_ctx->patch_ctx.data_count++; 735 gr_ctx->patch_ctx.data_count++;
727 nvgpu_log(g, gpu_dbg_info, 736 nvgpu_log(g, gpu_dbg_info,
728 "patch addr = 0x%x data = 0x%x data_count %d", 737 "patch addr = 0x%x data = 0x%x data_count %d",
729 addr, data, ch_ctx->patch_ctx.data_count); 738 addr, data, gr_ctx->patch_ctx.data_count);
730 } else { 739 } else {
731 gk20a_writel(g, addr, data); 740 gk20a_writel(g, addr, data);
732 } 741 }
@@ -793,14 +802,22 @@ void gr_gk20a_write_pm_ptr(struct gk20a *g,
793 802
794static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) 803static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
795{ 804{
796 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 805 struct tsg_gk20a *tsg;
797 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 806 struct nvgpu_gr_ctx *gr_ctx = NULL;
798 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 807 struct nvgpu_mem *mem = NULL;
808 struct ctx_header_desc *ctx = &c->ctx_header;
799 struct nvgpu_mem *ctxheader = &ctx->mem; 809 struct nvgpu_mem *ctxheader = &ctx->mem;
800 int ret = 0; 810 int ret = 0;
801 811
802 gk20a_dbg_fn(""); 812 gk20a_dbg_fn("");
803 813
814 tsg = tsg_gk20a_from_ch(c);
815 if (!tsg)
816 return -EINVAL;
817
818 gr_ctx = &tsg->gr_ctx;
819 mem = &gr_ctx->mem;
820
804 if (nvgpu_mem_begin(g, mem)) 821 if (nvgpu_mem_begin(g, mem))
805 return -ENOMEM; 822 return -ENOMEM;
806 823
@@ -809,8 +826,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
809 goto clean_up_mem; 826 goto clean_up_mem;
810 } 827 }
811 828
812 if (ch_ctx->zcull_ctx.gpu_va == 0 && 829 if (gr_ctx->zcull_ctx.gpu_va == 0 &&
813 ch_ctx->zcull_ctx.ctx_sw_mode == 830 gr_ctx->zcull_ctx.ctx_sw_mode ==
814 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { 831 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
815 ret = -EINVAL; 832 ret = -EINVAL;
816 goto clean_up; 833 goto clean_up;
@@ -830,13 +847,13 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
830 847
831 nvgpu_mem_wr(g, mem, 848 nvgpu_mem_wr(g, mem,
832 ctxsw_prog_main_image_zcull_o(), 849 ctxsw_prog_main_image_zcull_o(),
833 ch_ctx->zcull_ctx.ctx_sw_mode); 850 gr_ctx->zcull_ctx.ctx_sw_mode);
834 851
835 if (ctxheader->gpu_va) 852 if (ctxheader->gpu_va)
836 g->ops.gr.write_zcull_ptr(g, ctxheader, 853 g->ops.gr.write_zcull_ptr(g, ctxheader,
837 ch_ctx->zcull_ctx.gpu_va); 854 gr_ctx->zcull_ctx.gpu_va);
838 else 855 else
839 g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va); 856 g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
840 857
841 gk20a_enable_channel_tsg(g, c); 858 gk20a_enable_channel_tsg(g, c);
842 859
@@ -869,22 +886,29 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
869 struct channel_gk20a *c, bool patch) 886 struct channel_gk20a *c, bool patch)
870{ 887{
871 struct gr_gk20a *gr = &g->gr; 888 struct gr_gk20a *gr = &g->gr;
872 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 889 struct tsg_gk20a *tsg;
890 struct nvgpu_gr_ctx *gr_ctx = NULL;
873 u64 addr; 891 u64 addr;
874 u32 size; 892 u32 size;
875 893
876 gk20a_dbg_fn(""); 894 gk20a_dbg_fn("");
895
896 tsg = tsg_gk20a_from_ch(c);
897 if (!tsg)
898 return -EINVAL;
899
900 gr_ctx = &tsg->gr_ctx;
877 if (patch) { 901 if (patch) {
878 int err; 902 int err;
879 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); 903 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
880 if (err) 904 if (err)
881 return err; 905 return err;
882 } 906 }
883 907
884 /* global pagepool buffer */ 908 /* global pagepool buffer */
885 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> 909 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
886 gr_scc_pagepool_base_addr_39_8_align_bits_v()) | 910 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
887 (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << 911 (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
888 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); 912 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
889 913
890 size = gr->global_ctx_buffer[PAGEPOOL].mem.size / 914 size = gr->global_ctx_buffer[PAGEPOOL].mem.size /
@@ -896,12 +920,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
896 gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", 920 gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
897 addr, size); 921 addr, size);
898 922
899 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); 923 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch);
900 924
901 /* global bundle cb */ 925 /* global bundle cb */
902 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> 926 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
903 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | 927 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
904 (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << 928 (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
905 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); 929 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
906 930
907 size = gr->bundle_cb_default_size; 931 size = gr->bundle_cb_default_size;
@@ -909,20 +933,20 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
909 gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", 933 gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d",
910 addr, size); 934 addr, size);
911 935
912 g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); 936 g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
913 937
914 /* global attrib cb */ 938 /* global attrib cb */
915 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> 939 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
916 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | 940 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
917 (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << 941 (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
918 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); 942 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
919 943
920 gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); 944 gk20a_dbg_info("attrib cb addr : 0x%016llx", addr);
921 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); 945 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch);
922 g->ops.gr.commit_global_cb_manager(g, c, patch); 946 g->ops.gr.commit_global_cb_manager(g, c, patch);
923 947
924 if (patch) 948 if (patch)
925 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); 949 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
926 950
927 return 0; 951 return 0;
928} 952}
@@ -930,7 +954,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
930int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) 954int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
931{ 955{
932 struct gr_gk20a *gr = &g->gr; 956 struct gr_gk20a *gr = &g->gr;
933 struct channel_ctx_gk20a *ch_ctx = NULL; 957 struct nvgpu_gr_ctx *gr_ctx = NULL;
934 u32 gpm_pd_cfg; 958 u32 gpm_pd_cfg;
935 u32 pd_ab_dist_cfg0; 959 u32 pd_ab_dist_cfg0;
936 u32 ds_debug; 960 u32 ds_debug;
@@ -956,22 +980,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
956 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; 980 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
957 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; 981 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
958 982
959 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); 983 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
960 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); 984 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
961 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); 985 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
962 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); 986 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
963 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); 987 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
964 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); 988 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
965 } else { 989 } else {
966 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; 990 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
967 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; 991 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
968 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; 992 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
969 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; 993 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
970 994
971 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); 995 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
972 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); 996 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
973 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); 997 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
974 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); 998 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
975 } 999 }
976 1000
977 return 0; 1001 return 0;
@@ -1360,13 +1384,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1360 struct channel_gk20a *c) 1384 struct channel_gk20a *c)
1361{ 1385{
1362 struct gr_gk20a *gr = &g->gr; 1386 struct gr_gk20a *gr = &g->gr;
1363 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1387 struct tsg_gk20a *tsg;
1388 struct nvgpu_gr_ctx *gr_ctx = NULL;
1364 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); 1389 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1365 u32 ctx_header_words; 1390 u32 ctx_header_words;
1366 u32 i; 1391 u32 i;
1367 u32 data; 1392 u32 data;
1368 struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; 1393 struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1369 struct nvgpu_mem *gr_mem = &ch_ctx->gr_ctx->mem; 1394 struct nvgpu_mem *gr_mem;
1370 u32 err = 0; 1395 u32 err = 0;
1371 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; 1396 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1372 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; 1397 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
@@ -1374,6 +1399,13 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1374 1399
1375 gk20a_dbg_fn(""); 1400 gk20a_dbg_fn("");
1376 1401
1402 tsg = tsg_gk20a_from_ch(c);
1403 if (!tsg)
1404 return -EINVAL;
1405
1406 gr_ctx = &tsg->gr_ctx;
1407 gr_mem = &gr_ctx->mem;
1408
1377 /* golden ctx is global to all channels. Although only the first 1409 /* golden ctx is global to all channels. Although only the first
1378 channel initializes golden image, driver needs to prevent multiple 1410 channel initializes golden image, driver needs to prevent multiple
1379 channels from initializing golden ctx at the same time */ 1411 channels from initializing golden ctx at the same time */
@@ -1565,7 +1597,7 @@ restore_fe_go_idle:
1565 1597
1566 g->ops.gr.write_zcull_ptr(g, gold_mem, 0); 1598 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1567 1599
1568 err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1600 err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1569 if (err) 1601 if (err)
1570 goto clean_up; 1602 goto clean_up;
1571 1603
@@ -1614,20 +1646,25 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1614 struct channel_gk20a *c, 1646 struct channel_gk20a *c,
1615 bool enable_smpc_ctxsw) 1647 bool enable_smpc_ctxsw)
1616{ 1648{
1617 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1649 struct tsg_gk20a *tsg;
1618 struct nvgpu_mem *mem; 1650 struct nvgpu_gr_ctx *gr_ctx = NULL;
1651 struct nvgpu_mem *mem = NULL;
1619 u32 data; 1652 u32 data;
1620 int ret; 1653 int ret;
1621 1654
1622 gk20a_dbg_fn(""); 1655 gk20a_dbg_fn("");
1623 1656
1624 if (!ch_ctx->gr_ctx) { 1657 tsg = tsg_gk20a_from_ch(c);
1658 if (!tsg)
1659 return -EINVAL;
1660
1661 gr_ctx = &tsg->gr_ctx;
1662 mem = &gr_ctx->mem;
1663 if (!nvgpu_mem_is_valid(mem)) {
1625 nvgpu_err(g, "no graphics context allocated"); 1664 nvgpu_err(g, "no graphics context allocated");
1626 return -EFAULT; 1665 return -EFAULT;
1627 } 1666 }
1628 1667
1629 mem = &ch_ctx->gr_ctx->mem;
1630
1631 ret = gk20a_disable_channel_tsg(g, c); 1668 ret = gk20a_disable_channel_tsg(g, c);
1632 if (ret) { 1669 if (ret) {
1633 nvgpu_err(g, "failed to disable channel/TSG"); 1670 nvgpu_err(g, "failed to disable channel/TSG");
@@ -1670,24 +1707,30 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1670 struct channel_gk20a *c, 1707 struct channel_gk20a *c,
1671 bool enable_hwpm_ctxsw) 1708 bool enable_hwpm_ctxsw)
1672{ 1709{
1673 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1710 struct tsg_gk20a *tsg;
1674 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1711 struct nvgpu_mem *gr_mem = NULL;
1675 struct nvgpu_mem *gr_mem; 1712 struct nvgpu_gr_ctx *gr_ctx;
1713 struct pm_ctx_desc *pm_ctx;
1676 u32 data; 1714 u32 data;
1677 u64 virt_addr; 1715 u64 virt_addr;
1678 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 1716 struct ctx_header_desc *ctx = &c->ctx_header;
1679 struct nvgpu_mem *ctxheader = &ctx->mem; 1717 struct nvgpu_mem *ctxheader = &ctx->mem;
1680 int ret; 1718 int ret;
1681 1719
1682 gk20a_dbg_fn(""); 1720 gk20a_dbg_fn("");
1683 1721
1684 if (!ch_ctx->gr_ctx) { 1722 tsg = tsg_gk20a_from_ch(c);
1723 if (!tsg)
1724 return -EINVAL;
1725
1726 gr_ctx = &tsg->gr_ctx;
1727 pm_ctx = &gr_ctx->pm_ctx;
1728 gr_mem = &gr_ctx->mem;
1729 if (!nvgpu_mem_is_valid(gr_mem)) {
1685 nvgpu_err(g, "no graphics context allocated"); 1730 nvgpu_err(g, "no graphics context allocated");
1686 return -EFAULT; 1731 return -EFAULT;
1687 } 1732 }
1688 1733
1689 gr_mem = &ch_ctx->gr_ctx->mem;
1690
1691 if (enable_hwpm_ctxsw) { 1734 if (enable_hwpm_ctxsw) {
1692 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1735 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1693 return 0; 1736 return 0;
@@ -1816,20 +1859,25 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1816 struct channel_gk20a *c) 1859 struct channel_gk20a *c)
1817{ 1860{
1818 struct gr_gk20a *gr = &g->gr; 1861 struct gr_gk20a *gr = &g->gr;
1819 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1862 struct tsg_gk20a *tsg;
1863 struct nvgpu_gr_ctx *gr_ctx;
1820 u32 virt_addr_lo; 1864 u32 virt_addr_lo;
1821 u32 virt_addr_hi; 1865 u32 virt_addr_hi;
1822 u64 virt_addr = 0; 1866 u64 virt_addr = 0;
1823 u32 v, data; 1867 u32 v, data;
1824 int ret = 0; 1868 int ret = 0;
1825 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 1869 struct nvgpu_mem *mem;
1826 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1827 struct nvgpu_mem *ctxheader = &ctx->mem;
1828 1870
1829 gk20a_dbg_fn(""); 1871 gk20a_dbg_fn("");
1830 1872
1873 tsg = tsg_gk20a_from_ch(c);
1874 if (!tsg)
1875 return -EINVAL;
1876
1877 gr_ctx = &tsg->gr_ctx;
1878 mem = &gr_ctx->mem;
1831 if (gr->ctx_vars.local_golden_image == NULL) 1879 if (gr->ctx_vars.local_golden_image == NULL)
1832 return -1; 1880 return -EINVAL;
1833 1881
1834 /* Channel gr_ctx buffer is gpu cacheable. 1882 /* Channel gr_ctx buffer is gpu cacheable.
1835 Flush and invalidate before cpu update. */ 1883 Flush and invalidate before cpu update. */
@@ -1838,11 +1886,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1838 if (nvgpu_mem_begin(g, mem)) 1886 if (nvgpu_mem_begin(g, mem))
1839 return -ENOMEM; 1887 return -ENOMEM;
1840 1888
1841 if (nvgpu_mem_begin(g, ctxheader)) {
1842 ret = -ENOMEM;
1843 goto clean_up_mem;
1844 }
1845
1846 nvgpu_mem_wr_n(g, mem, 0, 1889 nvgpu_mem_wr_n(g, mem, 0,
1847 gr->ctx_vars.local_golden_image, 1890 gr->ctx_vars.local_golden_image,
1848 gr->ctx_vars.golden_image_size); 1891 gr->ctx_vars.golden_image_size);
@@ -1855,9 +1898,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1855 1898
1856 /* set priv access map */ 1899 /* set priv access map */
1857 virt_addr_lo = 1900 virt_addr_lo =
1858 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); 1901 u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1859 virt_addr_hi = 1902 virt_addr_hi =
1860 u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); 1903 u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1861 1904
1862 if (g->allow_all) 1905 if (g->allow_all)
1863 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); 1906 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
@@ -1867,21 +1910,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1867 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), 1910 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1868 data); 1911 data);
1869 1912
1870 if (ctxheader->gpu_va) { 1913 nvgpu_mem_wr(g, mem,
1871 nvgpu_mem_wr(g, ctxheader, 1914 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1872 ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 1915 virt_addr_lo);
1873 virt_addr_lo); 1916 nvgpu_mem_wr(g, mem,
1874 nvgpu_mem_wr(g, ctxheader, 1917 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1875 ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 1918 virt_addr_hi);
1876 virt_addr_hi); 1919
1877 } else {
1878 nvgpu_mem_wr(g, mem,
1879 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1880 virt_addr_lo);
1881 nvgpu_mem_wr(g, mem,
1882 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1883 virt_addr_hi);
1884 }
1885 /* disable verif features */ 1920 /* disable verif features */
1886 v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); 1921 v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1887 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); 1922 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
@@ -1889,65 +1924,50 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1889 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); 1924 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1890 1925
1891 if (g->ops.gr.update_ctxsw_preemption_mode) 1926 if (g->ops.gr.update_ctxsw_preemption_mode)
1892 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); 1927 g->ops.gr.update_ctxsw_preemption_mode(g, c, mem);
1893 1928
1894 if (g->ops.gr.update_boosted_ctx) 1929 if (g->ops.gr.update_boosted_ctx)
1895 g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx); 1930 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
1896 1931
1897 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1932 virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
1898 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1933 virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
1899 1934
1900 nvgpu_log(g, gpu_dbg_info, "write patch count = %d", 1935 nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
1901 ch_ctx->patch_ctx.data_count); 1936 gr_ctx->patch_ctx.data_count);
1902 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), 1937 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1903 ch_ctx->patch_ctx.data_count); 1938 gr_ctx->patch_ctx.data_count);
1904 1939
1905 if (ctxheader->gpu_va) { 1940 nvgpu_mem_wr(g, mem,
1906 nvgpu_mem_wr(g, ctxheader, 1941 ctxsw_prog_main_image_patch_adr_lo_o(),
1907 ctxsw_prog_main_image_patch_adr_lo_o(), 1942 virt_addr_lo);
1908 virt_addr_lo); 1943 nvgpu_mem_wr(g, mem,
1909 nvgpu_mem_wr(g, ctxheader, 1944 ctxsw_prog_main_image_patch_adr_hi_o(),
1910 ctxsw_prog_main_image_patch_adr_hi_o(), 1945 virt_addr_hi);
1911 virt_addr_hi);
1912 } else {
1913 nvgpu_mem_wr(g, mem,
1914 ctxsw_prog_main_image_patch_adr_lo_o(),
1915 virt_addr_lo);
1916 nvgpu_mem_wr(g, mem,
1917 ctxsw_prog_main_image_patch_adr_hi_o(),
1918 virt_addr_hi);
1919 }
1920 1946
1921 /* Update main header region of the context buffer with the info needed 1947 /* Update main header region of the context buffer with the info needed
1922 * for PM context switching, including mode and possibly a pointer to 1948 * for PM context switching, including mode and possibly a pointer to
1923 * the PM backing store. 1949 * the PM backing store.
1924 */ 1950 */
1925 if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { 1951 if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1926 if (ch_ctx->pm_ctx.mem.gpu_va == 0) { 1952 if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
1927 nvgpu_err(g, 1953 nvgpu_err(g,
1928 "context switched pm with no pm buffer!"); 1954 "context switched pm with no pm buffer!");
1929 nvgpu_mem_end(g, mem); 1955 nvgpu_mem_end(g, mem);
1930 return -EFAULT; 1956 return -EFAULT;
1931 } 1957 }
1932 1958
1933 virt_addr = ch_ctx->pm_ctx.mem.gpu_va; 1959 virt_addr = gr_ctx->pm_ctx.mem.gpu_va;
1934 } else 1960 } else
1935 virt_addr = 0; 1961 virt_addr = 0;
1936 1962
1937 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); 1963 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1938 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1964 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1939 data |= ch_ctx->pm_ctx.pm_mode; 1965 data |= gr_ctx->pm_ctx.pm_mode;
1940 1966
1941 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); 1967 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
1942 1968
1943 if (ctxheader->gpu_va) 1969 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
1944 g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
1945 else
1946 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
1947
1948 1970
1949 nvgpu_mem_end(g, ctxheader);
1950clean_up_mem:
1951 nvgpu_mem_end(g, mem); 1971 nvgpu_mem_end(g, mem);
1952 1972
1953 return ret; 1973 return ret;
@@ -2568,13 +2588,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2568 return -ENOMEM; 2588 return -ENOMEM;
2569} 2589}
2570 2590
2571static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) 2591static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
2592 struct vm_gk20a *vm,
2593 struct nvgpu_gr_ctx *gr_ctx)
2572{ 2594{
2573 struct vm_gk20a *ch_vm = c->vm; 2595 u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va;
2574 struct gr_gk20a *gr = &c->g->gr; 2596 u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size;
2575 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 2597 int *g_bfr_index = gr_ctx->global_ctx_buffer_index;
2576 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2577 int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index;
2578 u32 i; 2598 u32 i;
2579 2599
2580 gk20a_dbg_fn(""); 2600 gk20a_dbg_fn("");
@@ -2588,32 +2608,41 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
2588 * the correct struct nvgpu_mem to use. Handles the VPR 2608 * the correct struct nvgpu_mem to use. Handles the VPR
2589 * vs non-VPR difference in context images. 2609 * vs non-VPR difference in context images.
2590 */ 2610 */
2591 mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem; 2611 mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem;
2592 2612
2593 nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]); 2613 nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]);
2594 } 2614 }
2595 } 2615 }
2596 2616
2597 memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va)); 2617 memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
2598 memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size)); 2618 memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size));
2599 memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index)); 2619 memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
2600 2620
2601 c->ch_ctx.global_ctx_buffer_mapped = false; 2621 gr_ctx->global_ctx_buffer_mapped = false;
2602} 2622}
2603 2623
2604static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, 2624static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2605 struct channel_gk20a *c) 2625 struct channel_gk20a *c)
2606{ 2626{
2627 struct tsg_gk20a *tsg;
2607 struct vm_gk20a *ch_vm = c->vm; 2628 struct vm_gk20a *ch_vm = c->vm;
2608 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 2629 u64 *g_bfr_va;
2609 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 2630 u64 *g_bfr_size;
2610 int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; 2631 int *g_bfr_index;
2611 struct gr_gk20a *gr = &g->gr; 2632 struct gr_gk20a *gr = &g->gr;
2612 struct nvgpu_mem *mem; 2633 struct nvgpu_mem *mem;
2613 u64 gpu_va; 2634 u64 gpu_va;
2614 2635
2615 gk20a_dbg_fn(""); 2636 gk20a_dbg_fn("");
2616 2637
2638 tsg = tsg_gk20a_from_ch(c);
2639 if (!tsg)
2640 return -EINVAL;
2641
2642 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
2643 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
2644 g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index;
2645
2617 /* Circular Buffer */ 2646 /* Circular Buffer */
2618 if (c->vpr && 2647 if (c->vpr &&
2619 nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { 2648 nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) {
@@ -2688,21 +2717,20 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2688 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; 2717 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size;
2689 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; 2718 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2690 2719
2691 c->ch_ctx.global_ctx_buffer_mapped = true; 2720 tsg->gr_ctx.global_ctx_buffer_mapped = true;
2692 return 0; 2721 return 0;
2693 2722
2694clean_up: 2723clean_up:
2695 gr_gk20a_unmap_global_ctx_buffers(c); 2724 gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx);
2696 2725
2697 return -ENOMEM; 2726 return -ENOMEM;
2698} 2727}
2699 2728
2700int gr_gk20a_alloc_gr_ctx(struct gk20a *g, 2729int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2701 struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, 2730 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
2702 u32 class, 2731 u32 class,
2703 u32 padding) 2732 u32 padding)
2704{ 2733{
2705 struct gr_ctx_desc *gr_ctx = NULL;
2706 struct gr_gk20a *gr = &g->gr; 2734 struct gr_gk20a *gr = &g->gr;
2707 int err = 0; 2735 int err = 0;
2708 2736
@@ -2715,15 +2743,11 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2715 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; 2743 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2716 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; 2744 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2717 2745
2718 gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
2719 if (!gr_ctx)
2720 return -ENOMEM;
2721
2722 err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, 2746 err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
2723 gr->ctx_vars.buffer_total_size, 2747 gr->ctx_vars.buffer_total_size,
2724 &gr_ctx->mem); 2748 &gr_ctx->mem);
2725 if (err) 2749 if (err)
2726 goto err_free_ctx; 2750 return err;
2727 2751
2728 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, 2752 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
2729 &gr_ctx->mem, 2753 &gr_ctx->mem,
@@ -2734,15 +2758,10 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2734 if (!gr_ctx->mem.gpu_va) 2758 if (!gr_ctx->mem.gpu_va)
2735 goto err_free_mem; 2759 goto err_free_mem;
2736 2760
2737 *__gr_ctx = gr_ctx;
2738
2739 return 0; 2761 return 0;
2740 2762
2741 err_free_mem: 2763 err_free_mem:
2742 nvgpu_dma_free(g, &gr_ctx->mem); 2764 nvgpu_dma_free(g, &gr_ctx->mem);
2743 err_free_ctx:
2744 nvgpu_kfree(g, gr_ctx);
2745 gr_ctx = NULL;
2746 2765
2747 return err; 2766 return err;
2748} 2767}
@@ -2750,7 +2769,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2750static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, 2769static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2751 struct tsg_gk20a *tsg, u32 class, u32 padding) 2770 struct tsg_gk20a *tsg, u32 class, u32 padding)
2752{ 2771{
2753 struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; 2772 struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
2754 int err; 2773 int err;
2755 2774
2756 if (!tsg->vm) { 2775 if (!tsg->vm) {
@@ -2762,57 +2781,44 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2762 if (err) 2781 if (err)
2763 return err; 2782 return err;
2764 2783
2765 return 0; 2784 gr_ctx->tsgid = tsg->tsgid;
2766}
2767
2768static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2769 struct channel_gk20a *c,
2770 u32 class,
2771 u32 padding)
2772{
2773 struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx;
2774 int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding);
2775 if (err)
2776 return err;
2777 2785
2778 return 0; 2786 return 0;
2779} 2787}
2780 2788
2781void gr_gk20a_free_gr_ctx(struct gk20a *g, 2789void gr_gk20a_free_gr_ctx(struct gk20a *g,
2782 struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) 2790 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
2783{ 2791{
2784 gk20a_dbg_fn(""); 2792 gk20a_dbg_fn("");
2785 2793
2786 if (!gr_ctx || !gr_ctx->mem.gpu_va) 2794 if (gr_ctx->mem.gpu_va) {
2787 return; 2795 gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx);
2796 gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx);
2797 gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx);
2788 2798
2789 if (g->ops.gr.dump_ctxsw_stats && 2799 if (g->ops.gr.dump_ctxsw_stats &&
2790 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) 2800 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close)
2791 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); 2801 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx);
2792 2802
2793 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); 2803 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
2794 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); 2804 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
2795 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); 2805 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
2796 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); 2806 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
2797 nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va); 2807 nvgpu_dma_unmap_free(vm, &gr_ctx->mem);
2798 nvgpu_dma_free(g, &gr_ctx->mem); 2808
2799 nvgpu_kfree(g, gr_ctx); 2809 memset(gr_ctx, 0, sizeof(*gr_ctx));
2810 }
2800} 2811}
2801 2812
2802void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) 2813void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
2803{ 2814{
2815 struct gk20a *g = tsg->g;
2816
2804 if (!tsg->vm) { 2817 if (!tsg->vm) {
2805 nvgpu_err(tsg->g, "No address space bound"); 2818 nvgpu_err(g, "No address space bound");
2806 return; 2819 return;
2807 } 2820 }
2808 tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); 2821 tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx);
2809 tsg->tsg_gr_ctx = NULL;
2810}
2811
2812static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
2813{
2814 c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
2815 c->ch_ctx.gr_ctx = NULL;
2816} 2822}
2817 2823
2818u32 gr_gk20a_get_patch_slots(struct gk20a *g) 2824u32 gr_gk20a_get_patch_slots(struct gk20a *g)
@@ -2823,13 +2829,19 @@ u32 gr_gk20a_get_patch_slots(struct gk20a *g)
2823static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, 2829static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2824 struct channel_gk20a *c) 2830 struct channel_gk20a *c)
2825{ 2831{
2826 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2832 struct tsg_gk20a *tsg;
2833 struct patch_desc *patch_ctx;
2827 struct vm_gk20a *ch_vm = c->vm; 2834 struct vm_gk20a *ch_vm = c->vm;
2828 u32 alloc_size; 2835 u32 alloc_size;
2829 int err = 0; 2836 int err = 0;
2830 2837
2831 gk20a_dbg_fn(""); 2838 gk20a_dbg_fn("");
2832 2839
2840 tsg = tsg_gk20a_from_ch(c);
2841 if (!tsg)
2842 return -EINVAL;
2843
2844 patch_ctx = &tsg->gr_ctx.patch_ctx;
2833 alloc_size = g->ops.gr.get_patch_slots(g) * 2845 alloc_size = g->ops.gr.get_patch_slots(g) *
2834 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; 2846 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
2835 2847
@@ -2845,57 +2857,42 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2845 return 0; 2857 return 0;
2846} 2858}
2847 2859
2848static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) 2860static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
2861 struct vm_gk20a *vm,
2862 struct nvgpu_gr_ctx *gr_ctx)
2849{ 2863{
2850 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2864 struct patch_desc *patch_ctx = &gr_ctx->patch_ctx;
2851 struct gk20a *g = c->g;
2852 2865
2853 gk20a_dbg_fn(""); 2866 gk20a_dbg_fn("");
2854 2867
2855 if (patch_ctx->mem.gpu_va) 2868 if (patch_ctx->mem.gpu_va)
2856 nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem, 2869 nvgpu_gmmu_unmap(vm, &patch_ctx->mem,
2857 patch_ctx->mem.gpu_va); 2870 patch_ctx->mem.gpu_va);
2858 2871
2859 nvgpu_dma_free(g, &patch_ctx->mem); 2872 nvgpu_dma_free(g, &patch_ctx->mem);
2860 patch_ctx->data_count = 0; 2873 patch_ctx->data_count = 0;
2861} 2874}
2862 2875
2863static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c) 2876static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
2877 struct vm_gk20a *vm,
2878 struct nvgpu_gr_ctx *gr_ctx)
2864{ 2879{
2865 struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx; 2880 struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
2866 struct gk20a *g = c->g;
2867 2881
2868 gk20a_dbg_fn(""); 2882 gk20a_dbg_fn("");
2869 2883
2870 if (pm_ctx->mem.gpu_va) { 2884 if (pm_ctx->mem.gpu_va) {
2871 nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); 2885 nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
2872 2886
2873 nvgpu_dma_free(g, &pm_ctx->mem); 2887 nvgpu_dma_free(g, &pm_ctx->mem);
2874 } 2888 }
2875} 2889}
2876 2890
2877void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg)
2878{
2879 if(c->g->ops.fifo.free_channel_ctx_header)
2880 c->g->ops.fifo.free_channel_ctx_header(c);
2881 gr_gk20a_unmap_global_ctx_buffers(c);
2882 gr_gk20a_free_channel_patch_ctx(c);
2883 gr_gk20a_free_channel_pm_ctx(c);
2884 if (!is_tsg)
2885 gr_gk20a_free_channel_gr_ctx(c);
2886
2887 /* zcull_ctx */
2888
2889 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
2890
2891 c->first_init = false;
2892}
2893
2894int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) 2891int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
2895{ 2892{
2896 struct gk20a *g = c->g; 2893 struct gk20a *g = c->g;
2897 struct fifo_gk20a *f = &g->fifo; 2894 struct fifo_gk20a *f = &g->fifo;
2898 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 2895 struct nvgpu_gr_ctx *gr_ctx;
2899 struct tsg_gk20a *tsg = NULL; 2896 struct tsg_gk20a *tsg = NULL;
2900 int err = 0; 2897 int err = 0;
2901 2898
@@ -2917,92 +2914,64 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
2917 } 2914 }
2918 c->obj_class = class_num; 2915 c->obj_class = class_num;
2919 2916
2920 if (gk20a_is_channel_marked_as_tsg(c)) 2917 if (!gk20a_is_channel_marked_as_tsg(c))
2921 tsg = &f->tsg[c->tsgid]; 2918 return -EINVAL;
2922 2919
2923 /* allocate gr ctx buffer */ 2920 tsg = &f->tsg[c->tsgid];
2924 if (!tsg) { 2921 gr_ctx = &tsg->gr_ctx;
2925 if (!ch_ctx->gr_ctx) { 2922
2926 err = gr_gk20a_alloc_channel_gr_ctx(g, c, 2923 if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
2927 class_num, 2924 tsg->vm = c->vm;
2928 flags); 2925 nvgpu_vm_get(tsg->vm);
2929 if (err) { 2926 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
2930 nvgpu_err(g, 2927 class_num,
2931 "fail to allocate gr ctx buffer"); 2928 flags);
2932 goto out; 2929 if (err) {
2933 }
2934 } else {
2935 /*TBD: needs to be more subtle about which is
2936 * being allocated as some are allowed to be
2937 * allocated along same channel */
2938 nvgpu_err(g, 2930 nvgpu_err(g,
2939 "too many classes alloc'd on same channel"); 2931 "fail to allocate TSG gr ctx buffer");
2940 err = -EINVAL; 2932 nvgpu_vm_put(tsg->vm);
2933 tsg->vm = NULL;
2941 goto out; 2934 goto out;
2942 } 2935 }
2943 } else { 2936
2944 if (!tsg->tsg_gr_ctx) { 2937 /* allocate patch buffer */
2945 tsg->vm = c->vm; 2938 if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
2946 nvgpu_vm_get(tsg->vm); 2939 gr_ctx->patch_ctx.data_count = 0;
2947 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, 2940 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2948 class_num,
2949 flags);
2950 if (err) { 2941 if (err) {
2951 nvgpu_err(g, 2942 nvgpu_err(g,
2952 "fail to allocate TSG gr ctx buffer"); 2943 "fail to allocate patch buffer");
2953 nvgpu_vm_put(tsg->vm);
2954 tsg->vm = NULL;
2955 goto out; 2944 goto out;
2956 } 2945 }
2957 } 2946 }
2958 ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
2959 }
2960
2961 /* PM ctxt switch is off by default */
2962 ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
2963 2947
2964 /* commit gr ctx buffer */ 2948 /* map global buffer to channel gpu_va and commit */
2965 err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 2949 err = gr_gk20a_map_global_ctx_buffers(g, c);
2966 if (err) {
2967 nvgpu_err(g,
2968 "fail to commit gr ctx buffer");
2969 goto out;
2970 }
2971
2972 /* allocate patch buffer */
2973 if (!nvgpu_mem_is_valid(&ch_ctx->patch_ctx.mem)) {
2974 ch_ctx->patch_ctx.data_count = 0;
2975 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2976 if (err) { 2950 if (err) {
2977 nvgpu_err(g, 2951 nvgpu_err(g,
2978 "fail to allocate patch buffer"); 2952 "fail to map global ctx buffer");
2979 goto out; 2953 goto out;
2980 } 2954 }
2981 } 2955 gr_gk20a_commit_global_ctx_buffers(g, c, true);
2982 2956
2983 /* map global buffer to channel gpu_va and commit */ 2957 /* commit gr ctx buffer */
2984 if (!ch_ctx->global_ctx_buffer_mapped) { 2958 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
2985 err = gr_gk20a_map_global_ctx_buffers(g, c);
2986 if (err) { 2959 if (err) {
2987 nvgpu_err(g, 2960 nvgpu_err(g,
2988 "fail to map global ctx buffer"); 2961 "fail to commit gr ctx buffer");
2989 goto out; 2962 goto out;
2990 } 2963 }
2991 gr_gk20a_commit_global_ctx_buffers(g, c, true);
2992 }
2993 2964
2994 /* init golden image, ELPG enabled after this is done */ 2965 /* init golden image, ELPG enabled after this is done */
2995 err = gr_gk20a_init_golden_ctx_image(g, c); 2966 err = gr_gk20a_init_golden_ctx_image(g, c);
2996 if (err) { 2967 if (err) {
2997 nvgpu_err(g, 2968 nvgpu_err(g,
2998 "fail to init golden ctx image"); 2969 "fail to init golden ctx image");
2999 goto out; 2970 goto out;
3000 } 2971 }
3001 2972
3002 /* load golden image */ 2973 /* load golden image */
3003 if (!c->first_init) { 2974 gr_gk20a_load_golden_ctx_image(g, c);
3004 err = gr_gk20a_elpg_protected_call(g,
3005 gr_gk20a_load_golden_ctx_image(g, c));
3006 if (err) { 2975 if (err) {
3007 nvgpu_err(g, 2976 nvgpu_err(g,
3008 "fail to load golden ctx image"); 2977 "fail to load golden ctx image");
@@ -3016,11 +2985,21 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3016 "fail to bind channel for ctxsw trace"); 2985 "fail to bind channel for ctxsw trace");
3017 } 2986 }
3018#endif 2987#endif
3019 c->first_init = true;
3020 }
3021 2988
3022 if (g->ops.gr.set_czf_bypass) 2989 if (g->ops.gr.set_czf_bypass)
3023 g->ops.gr.set_czf_bypass(g, c); 2990 g->ops.gr.set_czf_bypass(g, c);
2991
2992 /* PM ctxt switch is off by default */
2993 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
2994 } else {
2995 /* commit gr ctx buffer */
2996 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
2997 if (err) {
2998 nvgpu_err(g,
2999 "fail to commit gr ctx buffer");
3000 goto out;
3001 }
3002 }
3024 3003
3025 gk20a_dbg_fn("done"); 3004 gk20a_dbg_fn("done");
3026 return 0; 3005 return 0;
@@ -3553,8 +3532,14 @@ u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
3553int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, 3532int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
3554 struct channel_gk20a *c, u64 zcull_va, u32 mode) 3533 struct channel_gk20a *c, u64 zcull_va, u32 mode)
3555{ 3534{
3556 struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; 3535 struct tsg_gk20a *tsg;
3536 struct zcull_ctx_desc *zcull_ctx;
3557 3537
3538 tsg = tsg_gk20a_from_ch(c);
3539 if (!tsg)
3540 return -EINVAL;
3541
3542 zcull_ctx = &tsg->gr_ctx.zcull_ctx;
3558 zcull_ctx->ctx_sw_mode = mode; 3543 zcull_ctx->ctx_sw_mode = mode;
3559 zcull_ctx->gpu_va = zcull_va; 3544 zcull_ctx->gpu_va = zcull_va;
3560 3545
@@ -6516,7 +6501,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void)
6516 * write will actually occur. so later we should put a lazy, 6501 * write will actually occur. so later we should put a lazy,
6517 * map-and-hold system in the patch write state */ 6502 * map-and-hold system in the patch write state */
6518static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, 6503static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6519 struct channel_ctx_gk20a *ch_ctx, 6504 struct channel_gk20a *ch,
6520 u32 addr, u32 data, 6505 u32 addr, u32 data,
6521 struct nvgpu_mem *mem) 6506 struct nvgpu_mem *mem)
6522{ 6507{
@@ -6531,9 +6516,16 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6531 u32 *ovr_perf_regs = NULL; 6516 u32 *ovr_perf_regs = NULL;
6532 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 6517 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6533 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 6518 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6534 struct ctx_header_desc *ctx = &ch_ctx->ctx_header; 6519 struct tsg_gk20a *tsg;
6520 struct nvgpu_gr_ctx *gr_ctx;
6521 struct ctx_header_desc *ctx = &ch->ctx_header;
6535 struct nvgpu_mem *ctxheader = &ctx->mem; 6522 struct nvgpu_mem *ctxheader = &ctx->mem;
6536 6523
6524 tsg = tsg_gk20a_from_ch(ch);
6525 if (!tsg)
6526 return -EINVAL;
6527
6528 gr_ctx = &tsg->gr_ctx;
6537 g->ops.gr.init_ovr_sm_dsm_perf(); 6529 g->ops.gr.init_ovr_sm_dsm_perf();
6538 g->ops.gr.init_sm_dsm_reg_info(); 6530 g->ops.gr.init_sm_dsm_reg_info();
6539 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); 6531 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs);
@@ -6556,17 +6548,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6556 ctxsw_prog_main_image_patch_count_o()); 6548 ctxsw_prog_main_image_patch_count_o());
6557 6549
6558 if (!tmp) 6550 if (!tmp)
6559 ch_ctx->patch_ctx.data_count = 0; 6551 gr_ctx->patch_ctx.data_count = 0;
6560 6552
6561 gr_gk20a_ctx_patch_write(g, ch_ctx, 6553 gr_gk20a_ctx_patch_write(g, gr_ctx,
6562 addr, data, true); 6554 addr, data, true);
6563 6555
6564 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 6556 vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
6565 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 6557 vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
6566 6558
6567 nvgpu_mem_wr(g, mem, 6559 nvgpu_mem_wr(g, mem,
6568 ctxsw_prog_main_image_patch_count_o(), 6560 ctxsw_prog_main_image_patch_count_o(),
6569 ch_ctx->patch_ctx.data_count); 6561 gr_ctx->patch_ctx.data_count);
6570 if (ctxheader->gpu_va) { 6562 if (ctxheader->gpu_va) {
6571 /* 6563 /*
6572 * Main context can be gr_ctx or pm_ctx. 6564 * Main context can be gr_ctx or pm_ctx.
@@ -6575,7 +6567,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6575 * __gr_gk20a_exec_ctx_ops. Need to take 6567 * __gr_gk20a_exec_ctx_ops. Need to take
6576 * care of cpu access to ctxheader here. 6568 * care of cpu access to ctxheader here.
6577 */ 6569 */
6578 if (nvgpu_mem_begin(g, ctxheader)) 6570 if (nvgpu_mem_begin(g, ctxheader))
6579 return -ENOMEM; 6571 return -ENOMEM;
6580 nvgpu_mem_wr(g, ctxheader, 6572 nvgpu_mem_wr(g, ctxheader,
6581 ctxsw_prog_main_image_patch_adr_lo_o(), 6573 ctxsw_prog_main_image_patch_adr_lo_o(),
@@ -7690,7 +7682,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7690 bool ch_is_curr_ctx) 7682 bool ch_is_curr_ctx)
7691{ 7683{
7692 struct gk20a *g = ch->g; 7684 struct gk20a *g = ch->g;
7693 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7685 struct tsg_gk20a *tsg;
7686 struct nvgpu_gr_ctx *gr_ctx;
7694 bool gr_ctx_ready = false; 7687 bool gr_ctx_ready = false;
7695 bool pm_ctx_ready = false; 7688 bool pm_ctx_ready = false;
7696 struct nvgpu_mem *current_mem = NULL; 7689 struct nvgpu_mem *current_mem = NULL;
@@ -7707,6 +7700,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7707 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", 7700 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
7708 num_ctx_wr_ops, num_ctx_rd_ops); 7701 num_ctx_wr_ops, num_ctx_rd_ops);
7709 7702
7703 tsg = tsg_gk20a_from_ch(ch);
7704 if (!tsg)
7705 return -EINVAL;
7706
7707 gr_ctx = &tsg->gr_ctx;
7708
7710 if (ch_is_curr_ctx) { 7709 if (ch_is_curr_ctx) {
7711 for (pass = 0; pass < 2; pass++) { 7710 for (pass = 0; pass < 2; pass++) {
7712 ctx_op_nr = 0; 7711 ctx_op_nr = 0;
@@ -7778,7 +7777,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7778 } 7777 }
7779 offset_addrs = offsets + max_offsets; 7778 offset_addrs = offsets + max_offsets;
7780 7779
7781 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); 7780 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
7782 if (err) 7781 if (err)
7783 goto cleanup; 7782 goto cleanup;
7784 7783
@@ -7812,13 +7811,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7812 * gr_gk20a_apply_instmem_overrides, 7811 * gr_gk20a_apply_instmem_overrides,
7813 * recoded in-place instead. 7812 * recoded in-place instead.
7814 */ 7813 */
7815 if (nvgpu_mem_begin(g, &ch_ctx->gr_ctx->mem)) { 7814 if (nvgpu_mem_begin(g, &gr_ctx->mem)) {
7816 err = -ENOMEM; 7815 err = -ENOMEM;
7817 goto cleanup; 7816 goto cleanup;
7818 } 7817 }
7819 gr_ctx_ready = true; 7818 gr_ctx_ready = true;
7820 } 7819 }
7821 current_mem = &ch_ctx->gr_ctx->mem; 7820 current_mem = &gr_ctx->mem;
7822 } else { 7821 } else {
7823 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7822 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7824 ctx_ops[i].offset, 7823 ctx_ops[i].offset,
@@ -7835,19 +7834,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7835 } 7834 }
7836 if (!pm_ctx_ready) { 7835 if (!pm_ctx_ready) {
7837 /* Make sure ctx buffer was initialized */ 7836 /* Make sure ctx buffer was initialized */
7838 if (!nvgpu_mem_is_valid(&ch_ctx->pm_ctx.mem)) { 7837 if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) {
7839 nvgpu_err(g, 7838 nvgpu_err(g,
7840 "Invalid ctx buffer"); 7839 "Invalid ctx buffer");
7841 err = -EINVAL; 7840 err = -EINVAL;
7842 goto cleanup; 7841 goto cleanup;
7843 } 7842 }
7844 if (nvgpu_mem_begin(g, &ch_ctx->pm_ctx.mem)) { 7843 if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) {
7845 err = -ENOMEM; 7844 err = -ENOMEM;
7846 goto cleanup; 7845 goto cleanup;
7847 } 7846 }
7848 pm_ctx_ready = true; 7847 pm_ctx_ready = true;
7849 } 7848 }
7850 current_mem = &ch_ctx->pm_ctx.mem; 7849 current_mem = &gr_ctx->pm_ctx.mem;
7851 } 7850 }
7852 7851
7853 /* if this is a quad access, setup for special access*/ 7852 /* if this is a quad access, setup for special access*/
@@ -7860,7 +7859,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7860 /* sanity check gr ctxt offsets, 7859 /* sanity check gr ctxt offsets,
7861 * don't write outside, worst case 7860 * don't write outside, worst case
7862 */ 7861 */
7863 if ((current_mem == &ch_ctx->gr_ctx->mem) && 7862 if ((current_mem == &gr_ctx->mem) &&
7864 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 7863 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
7865 continue; 7864 continue;
7866 if (pass == 0) { /* write pass */ 7865 if (pass == 0) { /* write pass */
@@ -7886,7 +7885,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7886 7885
7887 /* check to see if we need to add a special WAR 7886 /* check to see if we need to add a special WAR
7888 for some of the SMPC perf regs */ 7887 for some of the SMPC perf regs */
7889 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7888 gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j],
7890 v, current_mem); 7889 v, current_mem);
7891 7890
7892 } else { /* read pass */ 7891 } else { /* read pass */
@@ -7915,12 +7914,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7915 if (offsets) 7914 if (offsets)
7916 nvgpu_kfree(g, offsets); 7915 nvgpu_kfree(g, offsets);
7917 7916
7918 if (ch_ctx->patch_ctx.mem.cpu_va) 7917 if (gr_ctx->patch_ctx.mem.cpu_va)
7919 gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready); 7918 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
7920 if (gr_ctx_ready) 7919 if (gr_ctx_ready)
7921 nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); 7920 nvgpu_mem_end(g, &gr_ctx->mem);
7922 if (pm_ctx_ready) 7921 if (pm_ctx_ready)
7923 nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); 7922 nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem);
7924 7923
7925 return err; 7924 return err;
7926} 7925}
@@ -7962,23 +7961,23 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7962} 7961}
7963 7962
7964void gr_gk20a_commit_global_pagepool(struct gk20a *g, 7963void gr_gk20a_commit_global_pagepool(struct gk20a *g,
7965 struct channel_ctx_gk20a *ch_ctx, 7964 struct nvgpu_gr_ctx *gr_ctx,
7966 u64 addr, u32 size, bool patch) 7965 u64 addr, u32 size, bool patch)
7967{ 7966{
7968 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), 7967 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
7969 gr_scc_pagepool_base_addr_39_8_f(addr), patch); 7968 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
7970 7969
7971 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), 7970 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
7972 gr_scc_pagepool_total_pages_f(size) | 7971 gr_scc_pagepool_total_pages_f(size) |
7973 gr_scc_pagepool_valid_true_f(), patch); 7972 gr_scc_pagepool_valid_true_f(), patch);
7974 7973
7975 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), 7974 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
7976 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); 7975 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
7977 7976
7978 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), 7977 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
7979 gr_gpcs_gcc_pagepool_total_pages_f(size), patch); 7978 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
7980 7979
7981 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), 7980 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(),
7982 gr_pd_pagepool_total_pages_f(size) | 7981 gr_pd_pagepool_total_pages_f(size) |
7983 gr_pd_pagepool_valid_true_f(), patch); 7982 gr_pd_pagepool_valid_true_f(), patch);
7984} 7983}