summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2017-01-24 18:39:29 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-10 17:16:06 -0400
commit4b990224df5ab7bc2fc92a66ba8e5d9bba023ca8 (patch)
treede631aa6344f6f1dc5393beb6e31a81d49135224 /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent0c1f5c457410adb0b0f417c0a77521409731986b (diff)
gpu: nvgpu: gv11b: changes related to preemeption
Added function pointers to check chip specific valid gfx class and compute class. Also added function pointer to update ctx header with preemption buffer pointers. Also fall back to gp10b functions, where nothing is changed from gp10b to gv11b. Bug 200292090 Change-Id: I69900e32bbcce4576c4c0f7a7119c7dd8e984928 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1293503 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c502
1 files changed, 60 insertions, 442 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7638baaf..b8993052 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -73,6 +73,41 @@ static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
73 return valid; 73 return valid;
74} 74}
75 75
76static bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
77{
78 bool valid = false;
79
80 switch (class_num) {
81 case VOLTA_A:
82 case PASCAL_A:
83 case MAXWELL_B:
84 valid = true;
85 break;
86
87 default:
88 break;
89 }
90 return valid;
91}
92
93static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
94{
95 bool valid = false;
96
97 switch (class_num) {
98 case VOLTA_COMPUTE_A:
99 case PASCAL_COMPUTE_A:
100 case MAXWELL_COMPUTE_B:
101 valid = true;
102 break;
103
104 default:
105 break;
106 }
107 return valid;
108}
109
110
76static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, 111static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
77 bool *post_event, struct channel_gk20a *fault_ch, 112 bool *post_event, struct channel_gk20a *fault_ch,
78 u32 *hww_global_esr) 113 u32 *hww_global_esr)
@@ -113,122 +148,6 @@ static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
113 return 0; 148 return 0;
114} 149}
115 150
116static int gr_gv11b_commit_global_cb_manager(struct gk20a *g,
117 struct channel_gk20a *c, bool patch)
118{
119 struct gr_gk20a *gr = &g->gr;
120 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
121 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
122 u32 attrib_offset_in_chunk = 0;
123 u32 alpha_offset_in_chunk = 0;
124 u32 pd_ab_max_output;
125 u32 gpc_index, ppc_index;
126 u32 temp, temp2;
127 u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
128 u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
129
130 gk20a_dbg_fn("");
131
132 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
133 attrib_size_in_chunk = gr->attrib_cb_default_size +
134 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
135 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
136 cb_attrib_cache_size_init = gr->attrib_cb_default_size +
137 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
138 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
139 } else {
140 attrib_size_in_chunk = gr->attrib_cb_size;
141 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
142 }
143
144 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
145 gr->attrib_cb_default_size, patch);
146 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(),
147 gr->alpha_cb_default_size, patch);
148
149 pd_ab_max_output = (gr->alpha_cb_default_size *
150 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
151 gr_pd_ab_dist_cfg1_max_output_granularity_v();
152
153 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
154 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
155 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
156
157 attrib_offset_in_chunk = alpha_offset_in_chunk +
158 gr->tpc_count * gr->alpha_cb_size;
159
160 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
161 temp = proj_gpc_stride_v() * gpc_index;
162 temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index;
163 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
164 ppc_index++) {
165 cbm_cfg_size_beta = cb_attrib_cache_size_init *
166 gr->pes_tpc_count[ppc_index][gpc_index];
167 cbm_cfg_size_alpha = gr->alpha_cb_default_size *
168 gr->pes_tpc_count[ppc_index][gpc_index];
169 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
170 gr->pes_tpc_count[ppc_index][gpc_index];
171
172 gr_gk20a_ctx_patch_write(g, ch_ctx,
173 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
174 proj_ppc_in_gpc_stride_v() * ppc_index,
175 cbm_cfg_size_beta, patch);
176
177 gr_gk20a_ctx_patch_write(g, ch_ctx,
178 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
179 proj_ppc_in_gpc_stride_v() * ppc_index,
180 attrib_offset_in_chunk, patch);
181
182 gr_gk20a_ctx_patch_write(g, ch_ctx,
183 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
184 proj_ppc_in_gpc_stride_v() * ppc_index,
185 cbm_cfg_size_steadystate,
186 patch);
187
188 attrib_offset_in_chunk += attrib_size_in_chunk *
189 gr->pes_tpc_count[ppc_index][gpc_index];
190
191 gr_gk20a_ctx_patch_write(g, ch_ctx,
192 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
193 proj_ppc_in_gpc_stride_v() * ppc_index,
194 cbm_cfg_size_alpha, patch);
195
196 gr_gk20a_ctx_patch_write(g, ch_ctx,
197 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
198 proj_ppc_in_gpc_stride_v() * ppc_index,
199 alpha_offset_in_chunk, patch);
200
201 alpha_offset_in_chunk += gr->alpha_cb_size *
202 gr->pes_tpc_count[ppc_index][gpc_index];
203
204 gr_gk20a_ctx_patch_write(g, ch_ctx,
205 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
206 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
207 patch);
208 }
209 }
210
211 return 0;
212}
213
214static void gr_gv11b_commit_global_pagepool(struct gk20a *g,
215 struct channel_ctx_gk20a *ch_ctx,
216 u64 addr, u32 size, bool patch)
217{
218 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
219 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
220
221 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
222 gr_scc_pagepool_total_pages_f(size) |
223 gr_scc_pagepool_valid_true_f(), patch);
224
225 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
226 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
227
228 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
229 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
230}
231
232static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, 151static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr,
233 struct zbc_query_params *query_params) 152 struct zbc_query_params *query_params)
234{ 153{
@@ -604,42 +523,6 @@ static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
604 } 523 }
605} 524}
606 525
607static int gr_gv11b_init_ctx_state(struct gk20a *g)
608{
609 struct fecs_method_op_gk20a op = {
610 .mailbox = { .id = 0, .data = 0,
611 .clr = ~0, .ok = 0, .fail = 0},
612 .method.data = 0,
613 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
614 .cond.fail = GR_IS_UCODE_OP_SKIP,
615 };
616 int err;
617
618 gk20a_dbg_fn("");
619
620 err = gr_gk20a_init_ctx_state(g);
621 if (err)
622 return err;
623
624 if (!g->gr.t18x.ctx_vars.preempt_image_size) {
625 op.method.addr =
626 gr_fecs_method_push_adr_discover_preemption_image_size_v();
627 op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
628 err = gr_gk20a_submit_fecs_method_op(g, op, false);
629 if (err) {
630 nvgpu_err(g, "query preempt image size failed");
631 return err;
632 }
633 }
634
635 gk20a_dbg_info("preempt image size: %u",
636 g->gr.t18x.ctx_vars.preempt_image_size);
637
638 gk20a_dbg_fn("done");
639
640 return 0;
641}
642
643int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, 526int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size,
644 struct nvgpu_mem *mem) 527 struct nvgpu_mem *mem)
645{ 528{
@@ -671,253 +554,6 @@ fail_free:
671 return err; 554 return err;
672} 555}
673 556
674static int gr_gv11b_alloc_gr_ctx(struct gk20a *g,
675 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
676 u32 class,
677 u32 flags)
678{
679 int err;
680
681 gk20a_dbg_fn("");
682
683 err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
684 if (err)
685 return err;
686
687 (*gr_ctx)->t18x.ctx_id_valid = false;
688
689 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
690 flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
691
692 if (class == PASCAL_COMPUTE_A &&
693 g->gr.t18x.ctx_vars.force_preemption_cilp)
694 flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
695
696 if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
697 u32 spill_size =
698 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
699 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
700 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
701 gr_scc_pagepool_total_pages_byte_granularity_v();
702 u32 betacb_size = g->gr.attrib_cb_default_size +
703 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
704 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
705 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
706 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
707 g->gr.max_tpc_count;
708 attrib_cb_size = ALIGN(attrib_cb_size, 128);
709
710 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
711 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
712 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
713 attrib_cb_size);
714 err = gr_gv11b_alloc_buffer(vm,
715 g->gr.t18x.ctx_vars.preempt_image_size,
716 &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
717 if (err) {
718 nvgpu_err(vm->mm->g, "cannot allocate preempt buffer");
719 goto fail_free_gk20a_ctx;
720 }
721
722 err = gr_gv11b_alloc_buffer(vm,
723 spill_size,
724 &(*gr_ctx)->t18x.spill_ctxsw_buffer);
725 if (err) {
726 nvgpu_err(vm->mm->g, "cannot allocate spill buffer");
727 goto fail_free_preempt;
728 }
729
730 err = gr_gv11b_alloc_buffer(vm,
731 attrib_cb_size,
732 &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
733 if (err) {
734 nvgpu_err(vm->mm->g, "cannot allocate beta buffer");
735 goto fail_free_spill;
736 }
737
738 err = gr_gv11b_alloc_buffer(vm,
739 pagepool_size,
740 &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
741 if (err) {
742 nvgpu_err(vm->mm->g, "cannot allocate page pool");
743 goto fail_free_betacb;
744 }
745
746 (*gr_ctx)->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
747 }
748
749 if (class == PASCAL_COMPUTE_A) {
750 if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
751 (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
752 else
753 (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
754 }
755
756 gk20a_dbg_fn("done");
757
758 return err;
759
760fail_free_betacb:
761 nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
762fail_free_spill:
763 nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
764fail_free_preempt:
765 nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
766fail_free_gk20a_ctx:
767 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
768 *gr_ctx = NULL;
769
770 return err;
771}
772
773static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
774 struct gr_ctx_desc *gr_ctx)
775{
776 struct nvgpu_mem *mem = &gr_ctx->mem;
777
778 if (nvgpu_mem_begin(g, mem)) {
779 WARN_ON("Cannot map context");
780 return;
781 }
782 nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
783 nvgpu_mem_rd(g, mem,
784 ctxsw_prog_main_image_magic_value_o()),
785 ctxsw_prog_main_image_magic_value_v_value_v());
786
787
788 nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
789 nvgpu_mem_rd(g, mem,
790 ctxsw_prog_main_image_num_save_ops_o()));
791 nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
792 nvgpu_mem_rd(g, mem,
793 ctxsw_prog_main_image_num_wfi_save_ops_o()));
794 nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
795 nvgpu_mem_rd(g, mem,
796 ctxsw_prog_main_image_num_cta_save_ops_o()));
797 nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
798 nvgpu_mem_rd(g, mem,
799 ctxsw_prog_main_image_num_gfxp_save_ops_o()));
800 nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
801 nvgpu_mem_rd(g, mem,
802 ctxsw_prog_main_image_num_cilp_save_ops_o()));
803 nvgpu_err(g, "image gfx preemption option (GFXP is 1) %x",
804 nvgpu_mem_rd(g, mem,
805 ctxsw_prog_main_image_graphics_preemption_options_o()));
806 nvgpu_mem_end(g, mem);
807}
808
809static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
810 struct gr_ctx_desc *gr_ctx)
811{
812 gk20a_dbg_fn("");
813
814 if (!gr_ctx)
815 return;
816
817 if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
818 dump_ctx_switch_stats(g, vm, gr_ctx);
819
820 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
821 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
822 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
823 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
824 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
825 gk20a_dbg_fn("done");
826}
827
828
829static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
830 struct channel_ctx_gk20a *ch_ctx,
831 struct nvgpu_mem *mem)
832{
833 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
834 u32 gfxp_preempt_option =
835 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
836 u32 cilp_preempt_option =
837 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
838 int err;
839
840 gk20a_dbg_fn("");
841
842 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
843 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
844 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(),
845 gfxp_preempt_option);
846 }
847
848 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
849 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
850 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(),
851 cilp_preempt_option);
852 }
853
854 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
855 u32 addr;
856 u32 size;
857 u32 cbes_reserve;
858
859 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(),
860 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
861
862 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
863 if (err) {
864 nvgpu_err(g, "can't map patch context");
865 goto out;
866 }
867
868 addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
869 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
870 (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
871 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
872
873 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
874 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
875
876 addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
877 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
878 (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
879 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
880 size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
881
882 if (size == g->ops.gr.pagepool_default_size(g))
883 size = gr_scc_pagepool_total_pages_hwmax_v();
884
885 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
886
887 addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
888 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
889 (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
890 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
891 size = gr_ctx->t18x.spill_ctxsw_buffer.size /
892 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
893
894 gr_gk20a_ctx_patch_write(g, ch_ctx,
895 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
896 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
897 true);
898 gr_gk20a_ctx_patch_write(g, ch_ctx,
899 gr_gpc0_swdx_rm_spill_buffer_size_r(),
900 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
901 true);
902
903 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
904 gr_gk20a_ctx_patch_write(g, ch_ctx,
905 gr_gpcs_swdx_beta_cb_ctrl_r(),
906 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
907 cbes_reserve),
908 true);
909 gr_gk20a_ctx_patch_write(g, ch_ctx,
910 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
911 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
912 cbes_reserve),
913 true);
914
915 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
916 }
917
918out:
919 gk20a_dbg_fn("done");
920}
921 557
922static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, 558static int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
923 struct gk20a_debug_output *o) 559 struct gk20a_debug_output *o)
@@ -1145,41 +781,6 @@ static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1145 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 781 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1146} 782}
1147 783
1148static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g,
1149 struct channel_ctx_gk20a *ch_ctx,
1150 u64 addr, u64 size, bool patch)
1151{
1152 u32 data;
1153
1154 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
1155 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1156
1157 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
1158 gr_scc_bundle_cb_size_div_256b_f(size) |
1159 gr_scc_bundle_cb_size_valid_true_f(), patch);
1160
1161 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1162 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1163
1164 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1165 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1166 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1167
1168 /* data for state_limit */
1169 data = (g->gr.bundle_cb_default_size *
1170 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
1171 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
1172
1173 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
1174
1175 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1176 g->gr.bundle_cb_token_limit, data);
1177
1178 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
1179 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1180 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1181}
1182
1183static int gr_gv11b_init_fs_state(struct gk20a *g) 784static int gr_gv11b_init_fs_state(struct gk20a *g)
1184{ 785{
1185 return gr_gp10b_init_fs_state(g); 786 return gr_gp10b_init_fs_state(g);
@@ -2025,6 +1626,28 @@ static void gr_gv11b_load_tpc_mask(struct gk20a *g)
2025 1626
2026} 1627}
2027 1628
1629static void gr_gv11b_write_preemption_ptr(struct gk20a *g,
1630 struct nvgpu_mem *mem, u64 gpu_va)
1631{
1632 u32 addr_lo, addr_hi;
1633
1634 addr_lo = u64_lo32(gpu_va);
1635 addr_hi = u64_hi32(gpu_va);
1636
1637 nvgpu_mem_wr(g, mem,
1638 ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo);
1639 nvgpu_mem_wr(g, mem,
1640 ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi);
1641
1642 nvgpu_mem_wr(g, mem,
1643 ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo);
1644 nvgpu_mem_wr(g, mem,
1645 ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
1646 addr_hi);
1647
1648}
1649
1650
2028void gv11b_init_gr(struct gpu_ops *gops) 1651void gv11b_init_gr(struct gpu_ops *gops)
2029{ 1652{
2030 gp10b_init_gr(gops); 1653 gp10b_init_gr(gops);
@@ -2032,8 +1655,9 @@ void gv11b_init_gr(struct gpu_ops *gops)
2032 gops->gr.init_fs_state = gr_gv11b_init_fs_state; 1655 gops->gr.init_fs_state = gr_gv11b_init_fs_state;
2033 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; 1656 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
2034 gops->gr.is_valid_class = gr_gv11b_is_valid_class; 1657 gops->gr.is_valid_class = gr_gv11b_is_valid_class;
2035 gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; 1658 gops->gr.is_valid_gfx_class = gr_gv11b_is_valid_gfx_class;
2036 gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; 1659 gops->gr.is_valid_compute_class = gr_gv11b_is_valid_compute_class;
1660 gops->gr.write_preemption_ptr = gr_gv11b_write_preemption_ptr;
2037 gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; 1661 gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil;
2038 gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; 1662 gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl;
2039 gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; 1663 gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl;
@@ -2043,7 +1667,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
2043 gops->gr.calc_global_ctx_buffer_size = 1667 gops->gr.calc_global_ctx_buffer_size =
2044 gr_gv11b_calc_global_ctx_buffer_size; 1668 gr_gv11b_calc_global_ctx_buffer_size;
2045 gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; 1669 gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb;
2046 gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb;
2047 gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; 1670 gops->gr.handle_sw_method = gr_gv11b_handle_sw_method;
2048 gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; 1671 gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults;
2049 gops->gr.cb_size_default = gr_gv11b_cb_size_default; 1672 gops->gr.cb_size_default = gr_gv11b_cb_size_default;
@@ -2051,11 +1674,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
2051 gr_gv11b_set_alpha_circular_buffer_size; 1674 gr_gv11b_set_alpha_circular_buffer_size;
2052 gops->gr.set_circular_buffer_size = 1675 gops->gr.set_circular_buffer_size =
2053 gr_gv11b_set_circular_buffer_size; 1676 gr_gv11b_set_circular_buffer_size;
2054 gops->gr.init_ctx_state = gr_gv11b_init_ctx_state;
2055 gops->gr.alloc_gr_ctx = gr_gv11b_alloc_gr_ctx;
2056 gops->gr.free_gr_ctx = gr_gv11b_free_gr_ctx;
2057 gops->gr.update_ctxsw_preemption_mode =
2058 gr_gv11b_update_ctxsw_preemption_mode;
2059 gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; 1677 gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs;
2060 gops->gr.wait_empty = gr_gv11b_wait_empty; 1678 gops->gr.wait_empty = gr_gv11b_wait_empty;
2061 gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; 1679 gops->gr.init_cyclestats = gr_gv11b_init_cyclestats;