diff options
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 502 |
1 files changed, 60 insertions, 442 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7638baaf..b8993052 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -73,6 +73,41 @@ static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) | |||
73 | return valid; | 73 | return valid; |
74 | } | 74 | } |
75 | 75 | ||
76 | static bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
77 | { | ||
78 | bool valid = false; | ||
79 | |||
80 | switch (class_num) { | ||
81 | case VOLTA_A: | ||
82 | case PASCAL_A: | ||
83 | case MAXWELL_B: | ||
84 | valid = true; | ||
85 | break; | ||
86 | |||
87 | default: | ||
88 | break; | ||
89 | } | ||
90 | return valid; | ||
91 | } | ||
92 | |||
93 | static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
94 | { | ||
95 | bool valid = false; | ||
96 | |||
97 | switch (class_num) { | ||
98 | case VOLTA_COMPUTE_A: | ||
99 | case PASCAL_COMPUTE_A: | ||
100 | case MAXWELL_COMPUTE_B: | ||
101 | valid = true; | ||
102 | break; | ||
103 | |||
104 | default: | ||
105 | break; | ||
106 | } | ||
107 | return valid; | ||
108 | } | ||
109 | |||
110 | |||
76 | static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | 111 | static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, |
77 | bool *post_event, struct channel_gk20a *fault_ch, | 112 | bool *post_event, struct channel_gk20a *fault_ch, |
78 | u32 *hww_global_esr) | 113 | u32 *hww_global_esr) |
@@ -113,122 +148,6 @@ static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
113 | return 0; | 148 | return 0; |
114 | } | 149 | } |
115 | 150 | ||
116 | static int gr_gv11b_commit_global_cb_manager(struct gk20a *g, | ||
117 | struct channel_gk20a *c, bool patch) | ||
118 | { | ||
119 | struct gr_gk20a *gr = &g->gr; | ||
120 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
121 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
122 | u32 attrib_offset_in_chunk = 0; | ||
123 | u32 alpha_offset_in_chunk = 0; | ||
124 | u32 pd_ab_max_output; | ||
125 | u32 gpc_index, ppc_index; | ||
126 | u32 temp, temp2; | ||
127 | u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate; | ||
128 | u32 attrib_size_in_chunk, cb_attrib_cache_size_init; | ||
129 | |||
130 | gk20a_dbg_fn(""); | ||
131 | |||
132 | if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { | ||
133 | attrib_size_in_chunk = gr->attrib_cb_default_size + | ||
134 | (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - | ||
135 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); | ||
136 | cb_attrib_cache_size_init = gr->attrib_cb_default_size + | ||
137 | (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - | ||
138 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); | ||
139 | } else { | ||
140 | attrib_size_in_chunk = gr->attrib_cb_size; | ||
141 | cb_attrib_cache_size_init = gr->attrib_cb_default_size; | ||
142 | } | ||
143 | |||
144 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), | ||
145 | gr->attrib_cb_default_size, patch); | ||
146 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), | ||
147 | gr->alpha_cb_default_size, patch); | ||
148 | |||
149 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
150 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / | ||
151 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
152 | |||
153 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
154 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
155 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
156 | |||
157 | attrib_offset_in_chunk = alpha_offset_in_chunk + | ||
158 | gr->tpc_count * gr->alpha_cb_size; | ||
159 | |||
160 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
161 | temp = proj_gpc_stride_v() * gpc_index; | ||
162 | temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index; | ||
163 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
164 | ppc_index++) { | ||
165 | cbm_cfg_size_beta = cb_attrib_cache_size_init * | ||
166 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
167 | cbm_cfg_size_alpha = gr->alpha_cb_default_size * | ||
168 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
169 | cbm_cfg_size_steadystate = gr->attrib_cb_default_size * | ||
170 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
171 | |||
172 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
173 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | ||
174 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
175 | cbm_cfg_size_beta, patch); | ||
176 | |||
177 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
178 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | ||
179 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
180 | attrib_offset_in_chunk, patch); | ||
181 | |||
182 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
183 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + | ||
184 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
185 | cbm_cfg_size_steadystate, | ||
186 | patch); | ||
187 | |||
188 | attrib_offset_in_chunk += attrib_size_in_chunk * | ||
189 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
190 | |||
191 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
192 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | ||
193 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
194 | cbm_cfg_size_alpha, patch); | ||
195 | |||
196 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
197 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | ||
198 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
199 | alpha_offset_in_chunk, patch); | ||
200 | |||
201 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
202 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
203 | |||
204 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
205 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), | ||
206 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), | ||
207 | patch); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static void gr_gv11b_commit_global_pagepool(struct gk20a *g, | ||
215 | struct channel_ctx_gk20a *ch_ctx, | ||
216 | u64 addr, u32 size, bool patch) | ||
217 | { | ||
218 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), | ||
219 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); | ||
220 | |||
221 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), | ||
222 | gr_scc_pagepool_total_pages_f(size) | | ||
223 | gr_scc_pagepool_valid_true_f(), patch); | ||
224 | |||
225 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), | ||
226 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); | ||
227 | |||
228 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), | ||
229 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); | ||
230 | } | ||
231 | |||
232 | static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, | 151 | static int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, |
233 | struct zbc_query_params *query_params) | 152 | struct zbc_query_params *query_params) |
234 | { | 153 | { |
@@ -604,42 +523,6 @@ static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
604 | } | 523 | } |
605 | } | 524 | } |
606 | 525 | ||
607 | static int gr_gv11b_init_ctx_state(struct gk20a *g) | ||
608 | { | ||
609 | struct fecs_method_op_gk20a op = { | ||
610 | .mailbox = { .id = 0, .data = 0, | ||
611 | .clr = ~0, .ok = 0, .fail = 0}, | ||
612 | .method.data = 0, | ||
613 | .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, | ||
614 | .cond.fail = GR_IS_UCODE_OP_SKIP, | ||
615 | }; | ||
616 | int err; | ||
617 | |||
618 | gk20a_dbg_fn(""); | ||
619 | |||
620 | err = gr_gk20a_init_ctx_state(g); | ||
621 | if (err) | ||
622 | return err; | ||
623 | |||
624 | if (!g->gr.t18x.ctx_vars.preempt_image_size) { | ||
625 | op.method.addr = | ||
626 | gr_fecs_method_push_adr_discover_preemption_image_size_v(); | ||
627 | op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size; | ||
628 | err = gr_gk20a_submit_fecs_method_op(g, op, false); | ||
629 | if (err) { | ||
630 | nvgpu_err(g, "query preempt image size failed"); | ||
631 | return err; | ||
632 | } | ||
633 | } | ||
634 | |||
635 | gk20a_dbg_info("preempt image size: %u", | ||
636 | g->gr.t18x.ctx_vars.preempt_image_size); | ||
637 | |||
638 | gk20a_dbg_fn("done"); | ||
639 | |||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, | 526 | int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, |
644 | struct nvgpu_mem *mem) | 527 | struct nvgpu_mem *mem) |
645 | { | 528 | { |
@@ -671,253 +554,6 @@ fail_free: | |||
671 | return err; | 554 | return err; |
672 | } | 555 | } |
673 | 556 | ||
674 | static int gr_gv11b_alloc_gr_ctx(struct gk20a *g, | ||
675 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | ||
676 | u32 class, | ||
677 | u32 flags) | ||
678 | { | ||
679 | int err; | ||
680 | |||
681 | gk20a_dbg_fn(""); | ||
682 | |||
683 | err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); | ||
684 | if (err) | ||
685 | return err; | ||
686 | |||
687 | (*gr_ctx)->t18x.ctx_id_valid = false; | ||
688 | |||
689 | if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) | ||
690 | flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; | ||
691 | |||
692 | if (class == PASCAL_COMPUTE_A && | ||
693 | g->gr.t18x.ctx_vars.force_preemption_cilp) | ||
694 | flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP; | ||
695 | |||
696 | if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) { | ||
697 | u32 spill_size = | ||
698 | gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * | ||
699 | gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); | ||
700 | u32 pagepool_size = g->ops.gr.pagepool_default_size(g) * | ||
701 | gr_scc_pagepool_total_pages_byte_granularity_v(); | ||
702 | u32 betacb_size = g->gr.attrib_cb_default_size + | ||
703 | (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - | ||
704 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); | ||
705 | u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * | ||
706 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * | ||
707 | g->gr.max_tpc_count; | ||
708 | attrib_cb_size = ALIGN(attrib_cb_size, 128); | ||
709 | |||
710 | gk20a_dbg_info("gfxp context spill_size=%d", spill_size); | ||
711 | gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size); | ||
712 | gk20a_dbg_info("gfxp context attrib_cb_size=%d", | ||
713 | attrib_cb_size); | ||
714 | err = gr_gv11b_alloc_buffer(vm, | ||
715 | g->gr.t18x.ctx_vars.preempt_image_size, | ||
716 | &(*gr_ctx)->t18x.preempt_ctxsw_buffer); | ||
717 | if (err) { | ||
718 | nvgpu_err(vm->mm->g, "cannot allocate preempt buffer"); | ||
719 | goto fail_free_gk20a_ctx; | ||
720 | } | ||
721 | |||
722 | err = gr_gv11b_alloc_buffer(vm, | ||
723 | spill_size, | ||
724 | &(*gr_ctx)->t18x.spill_ctxsw_buffer); | ||
725 | if (err) { | ||
726 | nvgpu_err(vm->mm->g, "cannot allocate spill buffer"); | ||
727 | goto fail_free_preempt; | ||
728 | } | ||
729 | |||
730 | err = gr_gv11b_alloc_buffer(vm, | ||
731 | attrib_cb_size, | ||
732 | &(*gr_ctx)->t18x.betacb_ctxsw_buffer); | ||
733 | if (err) { | ||
734 | nvgpu_err(vm->mm->g, "cannot allocate beta buffer"); | ||
735 | goto fail_free_spill; | ||
736 | } | ||
737 | |||
738 | err = gr_gv11b_alloc_buffer(vm, | ||
739 | pagepool_size, | ||
740 | &(*gr_ctx)->t18x.pagepool_ctxsw_buffer); | ||
741 | if (err) { | ||
742 | nvgpu_err(vm->mm->g, "cannot allocate page pool"); | ||
743 | goto fail_free_betacb; | ||
744 | } | ||
745 | |||
746 | (*gr_ctx)->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
747 | } | ||
748 | |||
749 | if (class == PASCAL_COMPUTE_A) { | ||
750 | if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) | ||
751 | (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
752 | else | ||
753 | (*gr_ctx)->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
754 | } | ||
755 | |||
756 | gk20a_dbg_fn("done"); | ||
757 | |||
758 | return err; | ||
759 | |||
760 | fail_free_betacb: | ||
761 | nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer); | ||
762 | fail_free_spill: | ||
763 | nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer); | ||
764 | fail_free_preempt: | ||
765 | nvgpu_dma_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer); | ||
766 | fail_free_gk20a_ctx: | ||
767 | gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); | ||
768 | *gr_ctx = NULL; | ||
769 | |||
770 | return err; | ||
771 | } | ||
772 | |||
773 | static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, | ||
774 | struct gr_ctx_desc *gr_ctx) | ||
775 | { | ||
776 | struct nvgpu_mem *mem = &gr_ctx->mem; | ||
777 | |||
778 | if (nvgpu_mem_begin(g, mem)) { | ||
779 | WARN_ON("Cannot map context"); | ||
780 | return; | ||
781 | } | ||
782 | nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)", | ||
783 | nvgpu_mem_rd(g, mem, | ||
784 | ctxsw_prog_main_image_magic_value_o()), | ||
785 | ctxsw_prog_main_image_magic_value_v_value_v()); | ||
786 | |||
787 | |||
788 | nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d", | ||
789 | nvgpu_mem_rd(g, mem, | ||
790 | ctxsw_prog_main_image_num_save_ops_o())); | ||
791 | nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d", | ||
792 | nvgpu_mem_rd(g, mem, | ||
793 | ctxsw_prog_main_image_num_wfi_save_ops_o())); | ||
794 | nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d", | ||
795 | nvgpu_mem_rd(g, mem, | ||
796 | ctxsw_prog_main_image_num_cta_save_ops_o())); | ||
797 | nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d", | ||
798 | nvgpu_mem_rd(g, mem, | ||
799 | ctxsw_prog_main_image_num_gfxp_save_ops_o())); | ||
800 | nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d", | ||
801 | nvgpu_mem_rd(g, mem, | ||
802 | ctxsw_prog_main_image_num_cilp_save_ops_o())); | ||
803 | nvgpu_err(g, "image gfx preemption option (GFXP is 1) %x", | ||
804 | nvgpu_mem_rd(g, mem, | ||
805 | ctxsw_prog_main_image_graphics_preemption_options_o())); | ||
806 | nvgpu_mem_end(g, mem); | ||
807 | } | ||
808 | |||
809 | static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | ||
810 | struct gr_ctx_desc *gr_ctx) | ||
811 | { | ||
812 | gk20a_dbg_fn(""); | ||
813 | |||
814 | if (!gr_ctx) | ||
815 | return; | ||
816 | |||
817 | if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close) | ||
818 | dump_ctx_switch_stats(g, vm, gr_ctx); | ||
819 | |||
820 | nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); | ||
821 | nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); | ||
822 | nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); | ||
823 | nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); | ||
824 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); | ||
825 | gk20a_dbg_fn("done"); | ||
826 | } | ||
827 | |||
828 | |||
829 | static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | ||
830 | struct channel_ctx_gk20a *ch_ctx, | ||
831 | struct nvgpu_mem *mem) | ||
832 | { | ||
833 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
834 | u32 gfxp_preempt_option = | ||
835 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); | ||
836 | u32 cilp_preempt_option = | ||
837 | ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(); | ||
838 | int err; | ||
839 | |||
840 | gk20a_dbg_fn(""); | ||
841 | |||
842 | if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { | ||
843 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); | ||
844 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_graphics_preemption_options_o(), | ||
845 | gfxp_preempt_option); | ||
846 | } | ||
847 | |||
848 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { | ||
849 | gk20a_dbg_info("CILP: %x", cilp_preempt_option); | ||
850 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_compute_preemption_options_o(), | ||
851 | cilp_preempt_option); | ||
852 | } | ||
853 | |||
854 | if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) { | ||
855 | u32 addr; | ||
856 | u32 size; | ||
857 | u32 cbes_reserve; | ||
858 | |||
859 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_full_preemption_ptr_o(), | ||
860 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); | ||
861 | |||
862 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
863 | if (err) { | ||
864 | nvgpu_err(g, "can't map patch context"); | ||
865 | goto out; | ||
866 | } | ||
867 | |||
868 | addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >> | ||
869 | gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | | ||
870 | (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) << | ||
871 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); | ||
872 | |||
873 | gk20a_dbg_info("attrib cb addr : 0x%016x", addr); | ||
874 | g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); | ||
875 | |||
876 | addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >> | ||
877 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | | ||
878 | (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) << | ||
879 | (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); | ||
880 | size = gr_ctx->t18x.pagepool_ctxsw_buffer.size; | ||
881 | |||
882 | if (size == g->ops.gr.pagepool_default_size(g)) | ||
883 | size = gr_scc_pagepool_total_pages_hwmax_v(); | ||
884 | |||
885 | g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); | ||
886 | |||
887 | addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >> | ||
888 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | | ||
889 | (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) << | ||
890 | (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v())); | ||
891 | size = gr_ctx->t18x.spill_ctxsw_buffer.size / | ||
892 | gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); | ||
893 | |||
894 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
895 | gr_gpc0_swdx_rm_spill_buffer_addr_r(), | ||
896 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), | ||
897 | true); | ||
898 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
899 | gr_gpc0_swdx_rm_spill_buffer_size_r(), | ||
900 | gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), | ||
901 | true); | ||
902 | |||
903 | cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); | ||
904 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
905 | gr_gpcs_swdx_beta_cb_ctrl_r(), | ||
906 | gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( | ||
907 | cbes_reserve), | ||
908 | true); | ||
909 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
910 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), | ||
911 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( | ||
912 | cbes_reserve), | ||
913 | true); | ||
914 | |||
915 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
916 | } | ||
917 | |||
918 | out: | ||
919 | gk20a_dbg_fn("done"); | ||
920 | } | ||
921 | 557 | ||
922 | static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, | 558 | static int gr_gv11b_dump_gr_status_regs(struct gk20a *g, |
923 | struct gk20a_debug_output *o) | 559 | struct gk20a_debug_output *o) |
@@ -1145,41 +781,6 @@ static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, | |||
1145 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); | 781 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); |
1146 | } | 782 | } |
1147 | 783 | ||
1148 | static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g, | ||
1149 | struct channel_ctx_gk20a *ch_ctx, | ||
1150 | u64 addr, u64 size, bool patch) | ||
1151 | { | ||
1152 | u32 data; | ||
1153 | |||
1154 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
1155 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
1156 | |||
1157 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
1158 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
1159 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
1160 | |||
1161 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), | ||
1162 | gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); | ||
1163 | |||
1164 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), | ||
1165 | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | | ||
1166 | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); | ||
1167 | |||
1168 | /* data for state_limit */ | ||
1169 | data = (g->gr.bundle_cb_default_size * | ||
1170 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
1171 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
1172 | |||
1173 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
1174 | |||
1175 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
1176 | g->gr.bundle_cb_token_limit, data); | ||
1177 | |||
1178 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
1179 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
1180 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
1181 | } | ||
1182 | |||
1183 | static int gr_gv11b_init_fs_state(struct gk20a *g) | 784 | static int gr_gv11b_init_fs_state(struct gk20a *g) |
1184 | { | 785 | { |
1185 | return gr_gp10b_init_fs_state(g); | 786 | return gr_gp10b_init_fs_state(g); |
@@ -2025,6 +1626,28 @@ static void gr_gv11b_load_tpc_mask(struct gk20a *g) | |||
2025 | 1626 | ||
2026 | } | 1627 | } |
2027 | 1628 | ||
1629 | static void gr_gv11b_write_preemption_ptr(struct gk20a *g, | ||
1630 | struct nvgpu_mem *mem, u64 gpu_va) | ||
1631 | { | ||
1632 | u32 addr_lo, addr_hi; | ||
1633 | |||
1634 | addr_lo = u64_lo32(gpu_va); | ||
1635 | addr_hi = u64_hi32(gpu_va); | ||
1636 | |||
1637 | nvgpu_mem_wr(g, mem, | ||
1638 | ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo); | ||
1639 | nvgpu_mem_wr(g, mem, | ||
1640 | ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi); | ||
1641 | |||
1642 | nvgpu_mem_wr(g, mem, | ||
1643 | ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo); | ||
1644 | nvgpu_mem_wr(g, mem, | ||
1645 | ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(), | ||
1646 | addr_hi); | ||
1647 | |||
1648 | } | ||
1649 | |||
1650 | |||
2028 | void gv11b_init_gr(struct gpu_ops *gops) | 1651 | void gv11b_init_gr(struct gpu_ops *gops) |
2029 | { | 1652 | { |
2030 | gp10b_init_gr(gops); | 1653 | gp10b_init_gr(gops); |
@@ -2032,8 +1655,9 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
2032 | gops->gr.init_fs_state = gr_gv11b_init_fs_state; | 1655 | gops->gr.init_fs_state = gr_gv11b_init_fs_state; |
2033 | gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; | 1656 | gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; |
2034 | gops->gr.is_valid_class = gr_gv11b_is_valid_class; | 1657 | gops->gr.is_valid_class = gr_gv11b_is_valid_class; |
2035 | gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager; | 1658 | gops->gr.is_valid_gfx_class = gr_gv11b_is_valid_gfx_class; |
2036 | gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool; | 1659 | gops->gr.is_valid_compute_class = gr_gv11b_is_valid_compute_class; |
1660 | gops->gr.write_preemption_ptr = gr_gv11b_write_preemption_ptr; | ||
2037 | gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; | 1661 | gops->gr.add_zbc_s = gr_gv11b_add_zbc_stencil; |
2038 | gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; | 1662 | gops->gr.load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl; |
2039 | gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; | 1663 | gops->gr.load_zbc_s_tbl = gr_gv11b_load_stencil_tbl; |
@@ -2043,7 +1667,6 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
2043 | gops->gr.calc_global_ctx_buffer_size = | 1667 | gops->gr.calc_global_ctx_buffer_size = |
2044 | gr_gv11b_calc_global_ctx_buffer_size; | 1668 | gr_gv11b_calc_global_ctx_buffer_size; |
2045 | gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; | 1669 | gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb; |
2046 | gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb; | ||
2047 | gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; | 1670 | gops->gr.handle_sw_method = gr_gv11b_handle_sw_method; |
2048 | gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; | 1671 | gops->gr.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults; |
2049 | gops->gr.cb_size_default = gr_gv11b_cb_size_default; | 1672 | gops->gr.cb_size_default = gr_gv11b_cb_size_default; |
@@ -2051,11 +1674,6 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
2051 | gr_gv11b_set_alpha_circular_buffer_size; | 1674 | gr_gv11b_set_alpha_circular_buffer_size; |
2052 | gops->gr.set_circular_buffer_size = | 1675 | gops->gr.set_circular_buffer_size = |
2053 | gr_gv11b_set_circular_buffer_size; | 1676 | gr_gv11b_set_circular_buffer_size; |
2054 | gops->gr.init_ctx_state = gr_gv11b_init_ctx_state; | ||
2055 | gops->gr.alloc_gr_ctx = gr_gv11b_alloc_gr_ctx; | ||
2056 | gops->gr.free_gr_ctx = gr_gv11b_free_gr_ctx; | ||
2057 | gops->gr.update_ctxsw_preemption_mode = | ||
2058 | gr_gv11b_update_ctxsw_preemption_mode; | ||
2059 | gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; | 1677 | gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs; |
2060 | gops->gr.wait_empty = gr_gv11b_wait_empty; | 1678 | gops->gr.wait_empty = gr_gv11b_wait_empty; |
2061 | gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; | 1679 | gops->gr.init_cyclestats = gr_gv11b_init_cyclestats; |