summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c502
1 files changed, 60 insertions, 442 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7638baaf..b8993052 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -73,6 +73,41 @@ static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
73 return valid; 73 return valid;
74} 74}
75 75
76static bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
77{
78 bool valid = false;
79
80 switch (class_num) {
81 case VOLTA_A:
82 case PASCAL_A:
83 case MAXWELL_B:
84 valid = true;
85 break;
86
87 default:
88 break;
89 }
90 return valid;
91}
92
93static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
94{
95 bool valid = false;
96
97 switch (class_num) {
98 case VOLTA_COMPUTE_A:
99 case PASCAL_COMPUTE_A:
100 case MAXWELL_COMPUTE_B:
101 valid = true;
102 break;
103
104 default:
105 break;
106 }
107 return valid;
108}
109
110
76static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, 111static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
77 bool *post_event, struct channel_gk20a *fault_ch, 112 bool *post_event, struct channel_gk20a *fault_ch,
78 u32 *hww_global_esr) 113 u32 *hww_global_esr)
@@ -113,122 +148,6 @@ static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
113 return 0; 148 return 0;
114} 149}
115 150
116static int gr_gv11b_commit_global_cb_manager(struct gk20a *g,
117 struct channel_gk20a *c, bool patch)
118{
119 struct gr_gk20a *gr = &g->gr;
120 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
121 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
122 u32 attrib_offset_in_chunk = 0;
123 u32 alpha_offset_in_chunk = 0;
124 u32 pd_ab_max_output;
125 u32 gpc_index, ppc_index;
126 u32 temp, temp2;
127 u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
128 u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
129
130 gk20a_dbg_fn("");
131
132 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
133 attrib_size_in_chunk = gr->attrib_cb_default_size +
134 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
135 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
136 cb_attrib_cache_size_init = gr->attrib_cb_default_size +
137 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
138 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
139 } else {
140 attrib_size_in_chunk = gr->attrib_cb_size;
141 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
142 }
143
144 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
145 gr->attrib_cb_default_size, patch);
146 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(),
147 gr->alpha_cb_default_size, patch);
148
149 pd_ab_max_output = (gr->alpha_cb_default_size *
150 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
151 gr_pd_ab_dist_cfg1_max_output_granularity_v();
152
153 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
154 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
155 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
156
157 attrib_offset_in_chunk = alpha_offset_in_chunk +
158 gr->tpc_count * gr->alpha_cb_size;
159
160 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
161 temp = proj_gpc_stride_v() * gpc_index;
162 temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index;
163 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
164 ppc_index++) {
165 cbm_cfg_size_beta = cb_attrib_cache_size_init *
166 gr->pes_tpc_count[ppc_index][gpc_index];
167 cbm_cfg_size_alpha = gr->alpha_cb_default_size *
168 gr->pes_tpc_count[ppc_index][gpc_index];
169 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
170 gr->pes_tpc_count[ppc_index][gpc_index];
171
172 gr_gk20a_ctx_patch_write(g, ch_ctx,
173 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
174 proj_ppc_in_gpc_stride_v() * ppc_index,
175 cbm_cfg_size_beta, patch);
176
177 gr_gk20a_ctx_patch_write(g, ch_ctx,
178 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
179 proj_ppc_in_gpc_stride_v() * ppc_index,
180 attrib_offset_in_chunk, patch);
181
182 gr_gk20a_ctx_patch_write(g, ch_ctx,
183 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
184 proj_ppc_in_gpc_stride_v() * ppc_index,
185 cbm_cfg_size_steadystate,
186 patch);
187
188 attrib_offset_in_chunk += attrib_size_in_chunk *
189 gr->pes_tpc_count[ppc_index][gpc_index];
190
191 gr_gk20a_ctx_patch_write(g, ch_ctx,
192 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
193 proj_ppc_in_gpc_stride_v() * ppc_index,
194 cbm_cfg_size_alpha, patch);
195
196 gr_gk20a_ctx_patch_write(g, ch_ctx,
197 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
198 proj_ppc_in_gpc_stride_v() * ppc_index,
199 alpha_offset_in_chunk, patch);
200
201 alpha_offset_in_chunk += gr->alpha_cb_size *
202 gr->pes_tpc_count[ppc_index][gpc_index];
203
204 gr_gk20a_ctx_patch_write(g, ch_ctx,
205 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
206 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
207 patch);
208 }
209 }
210
211 return 0;
212}
213
214static void gr_gv11b_commit_global_pagepool(struct gk20a *g,
215 struct channel_ctx_gk20a *ch_ctx,
216 u64 addr, u32 size, bool patch)
217{
218 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
219 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
220
221 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
222 gr_scc_pagepool_total_pages_f(size) |
223 gr_scc_pagepool_valid_true_f(), patch);
224
225 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
226 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
227
228 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
229 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
230}
231
232static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, 151static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr,
233 struct zbc_query_params *query_params) 152 struct zbc_query_params *query_params)
234{ 153{
@@ -604,42 +523,6 @@ static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
604 } 523 }
605} 524}
606 525
607static int gr_gv11b_init_ctx_state(struct gk20a *g)
608{
609 struct fecs_method_op_gk20a op = {
610 .mailbox = { .id = 0, .data = 0,
611 .clr = ~0, .ok = 0, .fail = 0},
612 .method.data = 0,
613 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
614 .cond.fail = GR_IS_UCODE_OP_SKIP,
615 };
616 int err;
617
618 gk20a_dbg_fn("");
619
620 err = gr_gk20a_init_ctx_state(g);
621 if (err)
622 return err;
623
624 if (!g->gr.t18x.ctx_vars.preempt_image_size) {
625 op.method.addr =
626 gr_fecs_method_push_adr_discover_preemption_image_size_v();
627 op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
628 err = gr_gk20a_submit_fecs_method_op(g, op, false);
629 if (err) {
630 nvgpu_err(g, "query preempt image size failed");
631 return err;
632 }
633 }
634
635 gk20a_dbg_info("preempt image size: %u",
636 g->gr.t18x.ctx_vars.preempt_image_size);
637
638 gk20a_dbg_fn("done");
639
640 return 0;
641}
642
643int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, 526int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size,
644 struct nvgpu_mem *mem) 527 struct nvgpu_mem *mem)
645{ 528{
@@ -671,253 +554,6 @@ fail_free:
671 return err; 554 return err;
672} 555}
673 556
674static int gr_gv11b_alloc_gr_ctx(struct gk20a *g,
675 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
676 u32 class,
677 u32 flags)
678{
679 int err;
680
681 gk20a_dbg_fn("");
682
683 err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
684 if (err)
685 return err;
686
687 (*gr_ctx)->t18x.ctx_id_valid = false;
688
689 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
690 flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
691
692 if (class == PASCAL_COMPUTE_A &&
693 g->gr.t18x.ctx_vars.force_preemption_cilp)
694 flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
695
696 if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
697 u32 spill_size =
698 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
699 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
700 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
701 gr_scc_pagepool_total_pages_byte_granularity_v();
702 u32 betacb_size = g->gr.attrib_cb_default_size +
703 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
704 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
705 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
706 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
707 g->gr.max_tpc_count;
708 attrib_cb_size = ALIGN(attrib_cb_size, 128);
709
710 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
711 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
712 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
713 attrib_cb_size);
714 err = gr_gv11b_alloc_buffer(vm,
715 g->gr.t18x.ctx_vars.preempt_image_size,
716 &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
717 if (err) {
718 nvgpu_err(vm->mm->g, "cannot allocate preempt buffer");
719 goto fail_free_gk20a_ctx;
720 }
721
722 err = gr_gv11b_alloc_buffer(vm,
723 spill_size,
724 &(*gr_ctx)->t18x.spill_ctxsw_buffer);
725 if (err) {
726 nvgpu_err(vm->mm->g, "cannot allocate spill buffer");
727 goto fail_free_preempt;
728 }
729
730 err = gr_gv11b_alloc_buffer(vm,
731 attrib_cb_size,
732 &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
733 if (err) {
734 nvgpu_err(vm->mm->g, "cannot allocate beta buffer");
735 goto fail_free_spill;
736 }
737
738 err = gr_gv11b_alloc_buffer(vm,
739 pagepool_size,
740 &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
741 if (err) {
742 nvgpu_err(vm->mm->g, "cannot allocate page pool");
743 goto fail_free_betacb;
744 }
745
746 (*gr_ctx)->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
747 }
748
749 if (class == PASCAL_COMPUTE_A) {
750 if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
751 (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
752 else
753 (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
754 }
755
756 gk20a_dbg_fn("done");
757
758 return err;
759
760fail_free_betacb:
761 nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
762fail_free_spill:
763 nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
764fail_free_preempt:
765 nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
766fail_free_gk20a_ctx:
767 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
768 *gr_ctx = NULL;
769
770 return err;
771}
772
773static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
774 struct gr_ctx_desc *gr_ctx)
775{
776 struct nvgpu_mem *mem = &gr_ctx->mem;
777
778 if (nvgpu_mem_begin(g, mem)) {
779 WARN_ON("Cannot map context");
780 return;
781 }
782 nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
783 nvgpu_mem_rd(g, mem,
784 ctxsw_prog_main_image_magic_value_o()),
785 ctxsw_prog_main_image_magic_value_v_value_v());
786
787
788 nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
789 nvgpu_mem_rd(g, mem,
790 ctxsw_prog_main_image_num_save_ops_o()));
791 nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
792 nvgpu_mem_rd(g, mem,
793 ctxsw_prog_main_image_num_wfi_save_ops_o()));
794 nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
795 nvgpu_mem_rd(g, mem,
796 ctxsw_prog_main_image_num_cta_save_ops_o()));
797 nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
798 nvgpu_mem_rd(g, mem,
799 ctxsw_prog_main_image_num_gfxp_save_ops_o()));
800 nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
801 nvgpu_mem_rd(g, mem,
802 ctxsw_prog_main_image_num_cilp_save_ops_o()));
803 nvgpu_err(g, "image gfx preemption option (GFXP is 1) %x",
804 nvgpu_mem_rd(g, mem,
805 ctxsw_prog_main_image_graphics_preemption_options_o()));
806 nvgpu_mem_end(g, mem);
807}
808
809static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
810 struct gr_ctx_desc *gr_ctx)
811{
812 gk20a_dbg_fn("");
813
814 if (!gr_ctx)
815 return;
816
817 if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
818 dump_ctx_switch_stats(g, vm, gr_ctx);
819
820 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
821 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
822 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
823 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
824 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
825 gk20a_dbg_fn("done");
826}
827
828
829static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
830 struct channel_ctx_gk20a *ch_ctx,
831 struct nvgpu_mem *mem)
832{
833 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
834 u32 gfxp_preempt_option =
835 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
836 u32 cilp_preempt_option =
837 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
838 int err;
839
840 gk20a_dbg_fn("");
841
842 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
843 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
844 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(),
845 gfxp_preempt_option);
846 }
847
848 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
849 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
850 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(),
851 cilp_preempt_option);
852 }
853
854 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
855 u32 addr;
856 u32 size;
857 u32 cbes_reserve;
858
859 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(),
860 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
861
862 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
863 if (err) {
864 nvgpu_err(g, "can't map patch context");
865 goto out;
866 }
867
868 addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
869 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
870 (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
871 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
872
873 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
874 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
875
876 addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
877 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
878 (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
879 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
880 size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
881
882 if (size == g->ops.gr.pagepool_default_size(g))
883 size = gr_scc_pagepool_total_pages_hwmax_v();
884
885 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
886
887 addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
888 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
889 (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
890 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
891 size = gr_ctx->t18x.spill_ctxsw_buffer.size /
892 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
893
894 gr_gk20a_ctx_patch_write(g, ch_ctx,
895 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
896 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
897 true);
898 gr_gk20a_ctx_patch_write(g, ch_ctx,
899 gr_gpc0_swdx_rm_spill_buffer_size_r(),
900 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
901 true);
902
903 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
904 gr_gk20a_ctx_patch_write(g, ch_ctx,
905 gr_gpcs_swdx_beta_cb_ctrl_r(),
906 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
907 cbes_reserve),
908 true);
909 gr_gk20a_ctx_patch_write(g, ch_ctx,
910 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
911 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
912 cbes_reserve),
913 true);
914
915 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
916 }
917
918out:
919 gk20a_dbg_fn("done");
920}
921 557
922static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, 558static int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
923 struct gk20a_debug_output *o) 559 struct gk20a_debug_output *o)
@@ -1145,41 +781,6 @@ static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1145 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 781 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1146} 782}
1147 783
1148static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g,
1149 struct channel_ctx_gk20a *ch_ctx,
1150 u64 addr, u64 size, bool patch)
1151{
1152 u32 data;
1153
1154 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
1155 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1156
1157 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
1158 gr_scc_bundle_cb_size_div_256b_f(size) |
1159 gr_scc_bundle_cb_size_valid_true_f(), patch);
1160
1161 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1162 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1163
1164 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1165 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1166 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1167
1168 /* data for state_limit */
1169 data = (g->gr.bundle_cb_default_size *
1170 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
1171 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
1172
1173 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
1174
1175 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1176 g->gr.bundle_cb_token_limit, data);
1177
1178 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
1179 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1180 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1181}
1182
1183static int gr_gv11b_init_fs_state(struct gk20a *g) 784static int gr_gv11b_init_fs_state(struct gk20a *g)
1184{ 785{
1185 return gr_gp10b_init_fs_state(g); 786 return gr_gp10b_init_fs_state(g);
@@ -2025,6 +1626,28 @@ static void gr_gv11b_load_tpc_mask(struct gk20a *g)
2025 1626
2026} 1627}
2027 1628
1629static void gr_gv11b_write_preemption_ptr(struct gk20a *g,
1630 struct nvgpu_mem *mem, u64 gpu_va)
1631{
1632 u32 addr_lo, addr_hi;
1633
1634 addr_lo = u64_lo32(gpu_va);
1635 addr_hi = u64_hi32(gpu_va);
1636
1637 nvgpu_mem_wr(g, mem,
1638 ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo);
1639 nvgpu_mem_wr(g, mem,
1640 ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi);
1641
1642 nvgpu_mem_wr(g, mem,
1643 ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo);
1644 nvgpu_mem_wr(g, mem,
1645 ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
1646 addr_hi);
1647
1648}
1649
1650
2028void gv11b_init_gr(struct gpu_ops *gops) 1651void gv11b_init_gr(struct gpu_ops *gops)
2029{ 1652{
2030 gp10b_init_gr(gops); 1653 gp10b_init_gr(gops);
@@ -2032,8 +1655,9 @@ void gv11b_init_gr(struct gpu_ops *gops)
2032 gops->gr.init_fs_state = gr_gv11b_init_fs_state; 1655 gops->gr.init_fs_state = gr_gv11b_init_fs_state;
2033 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; 1656 gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch;
2034 gops->gr.is_valid_class = gr_gv11b_is_valid_class; 1657 gops->gr.is_valid_class = gr_gv11b_is_valid_class;
2035 gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; 1658 gops->gr.is_valid_gfx_class = gr_gv11b_is_valid_gfx_class;
2036 gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; 1659 gops->gr.is_valid_compute_class = gr_gv11b_is_valid_compute_class;
1660 gops->gr.write_preemption_ptr = gr_gv11b_write_preemption_ptr;
2037 gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; 1661 gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil;
2038 gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; 1662 gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl;
2039 gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; 1663 gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl;
@@ -2043,7 +1667,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
2043 gops->gr.calc_global_ctx_buffer_size = 1667 gops->gr.calc_global_ctx_buffer_size =
2044 gr_gv11b_calc_global_ctx_buffer_size; 1668 gr_gv11b_calc_global_ctx_buffer_size;
2045 gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; 1669 gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb;
2046 gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb;
2047 gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; 1670 gops->gr.handle_sw_method = gr_gv11b_handle_sw_method;
2048 gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; 1671 gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults;
2049 gops->gr.cb_size_default = gr_gv11b_cb_size_default; 1672 gops->gr.cb_size_default = gr_gv11b_cb_size_default;
@@ -2051,11 +1674,6 @@ void gv11b_init_gr(struct gpu_ops *gops)
2051 gr_gv11b_set_alpha_circular_buffer_size; 1674 gr_gv11b_set_alpha_circular_buffer_size;
2052 gops->gr.set_circular_buffer_size = 1675 gops->gr.set_circular_buffer_size =
2053 gr_gv11b_set_circular_buffer_size; 1676 gr_gv11b_set_circular_buffer_size;
2054 gops->gr.init_ctx_state = gr_gv11b_init_ctx_state;
2055 gops->gr.alloc_gr_ctx = gr_gv11b_alloc_gr_ctx;
2056 gops->gr.free_gr_ctx = gr_gv11b_free_gr_ctx;
2057 gops->gr.update_ctxsw_preemption_mode =
2058 gr_gv11b_update_ctxsw_preemption_mode;
2059 gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; 1677 gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs;
2060 gops->gr.wait_empty = gr_gv11b_wait_empty; 1678 gops->gr.wait_empty = gr_gv11b_wait_empty;
2061 gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; 1679 gops->gr.init_cyclestats = gr_gv11b_init_cyclestats;