diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 1527 |
1 files changed, 1527 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c new file mode 100644 index 00000000..ef46c1ee --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -0,0 +1,1527 @@ | |||
1 | /* | ||
2 | * GM20B GPC MMU | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <uapi/linux/nvgpu.h> | ||
26 | |||
27 | #include <nvgpu/kmem.h> | ||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/enabled.h> | ||
30 | #include <nvgpu/debug.h> | ||
31 | #include <nvgpu/fuse.h> | ||
32 | |||
33 | #include "gk20a/gk20a.h" | ||
34 | #include "gk20a/gr_gk20a.h" | ||
35 | #include "common/linux/os_linux.h" | ||
36 | |||
37 | #include "gr_gm20b.h" | ||
38 | #include "pmu_gm20b.h" | ||
39 | |||
40 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> | ||
41 | #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h> | ||
42 | #include <nvgpu/hw/gm20b/hw_fb_gm20b.h> | ||
43 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
44 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
45 | #include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> | ||
46 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> | ||
47 | |||
48 | void gr_gm20b_init_gpc_mmu(struct gk20a *g) | ||
49 | { | ||
50 | u32 temp; | ||
51 | |||
52 | gk20a_dbg_info("initialize gpc mmu"); | ||
53 | |||
54 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
55 | /* Bypass MMU check for non-secure boot. For | ||
56 | * secure-boot,this register write has no-effect */ | ||
57 | gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); | ||
58 | } | ||
59 | temp = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
60 | temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | | ||
61 | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | | ||
62 | gr_gpcs_pri_mmu_ctrl_use_full_comp_tag_line_m() | | ||
63 | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | | ||
64 | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | | ||
65 | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | | ||
66 | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | | ||
67 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | ||
68 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | ||
69 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); | ||
70 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | ||
71 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | ||
72 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | ||
73 | |||
74 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), | ||
75 | gk20a_readl(g, fb_mmu_debug_ctrl_r())); | ||
76 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), | ||
77 | gk20a_readl(g, fb_mmu_debug_wr_r())); | ||
78 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), | ||
79 | gk20a_readl(g, fb_mmu_debug_rd_r())); | ||
80 | |||
81 | gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), | ||
82 | gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); | ||
83 | } | ||
84 | |||
85 | void gr_gm20b_bundle_cb_defaults(struct gk20a *g) | ||
86 | { | ||
87 | struct gr_gk20a *gr = &g->gr; | ||
88 | |||
89 | gr->bundle_cb_default_size = | ||
90 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
91 | gr->min_gpm_fifo_depth = | ||
92 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
93 | gr->bundle_cb_token_limit = | ||
94 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
95 | } | ||
96 | |||
97 | void gr_gm20b_cb_size_default(struct gk20a *g) | ||
98 | { | ||
99 | struct gr_gk20a *gr = &g->gr; | ||
100 | |||
101 | if (!gr->attrib_cb_default_size) | ||
102 | gr->attrib_cb_default_size = | ||
103 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
104 | gr->alpha_cb_default_size = | ||
105 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
106 | } | ||
107 | |||
108 | int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) | ||
109 | { | ||
110 | struct gr_gk20a *gr = &g->gr; | ||
111 | int size; | ||
112 | |||
113 | gr->attrib_cb_size = gr->attrib_cb_default_size | ||
114 | + (gr->attrib_cb_default_size >> 1); | ||
115 | gr->alpha_cb_size = gr->alpha_cb_default_size | ||
116 | + (gr->alpha_cb_default_size >> 1); | ||
117 | |||
118 | size = gr->attrib_cb_size * | ||
119 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * | ||
120 | gr->max_tpc_count; | ||
121 | |||
122 | size += gr->alpha_cb_size * | ||
123 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * | ||
124 | gr->max_tpc_count; | ||
125 | |||
126 | return size; | ||
127 | } | ||
128 | |||
129 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | ||
130 | struct channel_ctx_gk20a *ch_ctx, | ||
131 | u64 addr, bool patch) | ||
132 | { | ||
133 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | ||
134 | gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | | ||
135 | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); | ||
136 | |||
137 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), | ||
138 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | | ||
139 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); | ||
140 | |||
141 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), | ||
142 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | | ||
143 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); | ||
144 | } | ||
145 | |||
146 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, | ||
147 | struct channel_ctx_gk20a *ch_ctx, | ||
148 | u64 addr, u64 size, bool patch) | ||
149 | { | ||
150 | u32 data; | ||
151 | |||
152 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
153 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
154 | |||
155 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
156 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
157 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
158 | |||
159 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), | ||
160 | gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); | ||
161 | |||
162 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), | ||
163 | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | | ||
164 | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); | ||
165 | |||
166 | /* data for state_limit */ | ||
167 | data = (g->gr.bundle_cb_default_size * | ||
168 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
169 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
170 | |||
171 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
172 | |||
173 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
174 | g->gr.bundle_cb_token_limit, data); | ||
175 | |||
176 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
177 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
178 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
179 | |||
180 | } | ||
181 | |||
182 | int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | ||
183 | struct channel_gk20a *c, bool patch) | ||
184 | { | ||
185 | struct gr_gk20a *gr = &g->gr; | ||
186 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
187 | u32 attrib_offset_in_chunk = 0; | ||
188 | u32 alpha_offset_in_chunk = 0; | ||
189 | u32 pd_ab_max_output; | ||
190 | u32 gpc_index, ppc_index; | ||
191 | u32 cbm_cfg_size1, cbm_cfg_size2; | ||
192 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
193 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
194 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, | ||
195 | GPU_LIT_NUM_PES_PER_GPC); | ||
196 | |||
197 | gk20a_dbg_fn(""); | ||
198 | |||
199 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | ||
200 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | ||
201 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | ||
202 | patch); | ||
203 | |||
204 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
205 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / | ||
206 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
207 | |||
208 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
209 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
210 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
211 | |||
212 | alpha_offset_in_chunk = attrib_offset_in_chunk + | ||
213 | gr->tpc_count * gr->attrib_cb_size; | ||
214 | |||
215 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
216 | u32 temp = gpc_stride * gpc_index; | ||
217 | u32 temp2 = num_pes_per_gpc * gpc_index; | ||
218 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
219 | ppc_index++) { | ||
220 | cbm_cfg_size1 = gr->attrib_cb_default_size * | ||
221 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
222 | cbm_cfg_size2 = gr->alpha_cb_default_size * | ||
223 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
224 | |||
225 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
226 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | ||
227 | ppc_in_gpc_stride * ppc_index, | ||
228 | cbm_cfg_size1, patch); | ||
229 | |||
230 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
231 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | ||
232 | ppc_in_gpc_stride * ppc_index, | ||
233 | attrib_offset_in_chunk, patch); | ||
234 | |||
235 | attrib_offset_in_chunk += gr->attrib_cb_size * | ||
236 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
237 | |||
238 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
239 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | ||
240 | ppc_in_gpc_stride * ppc_index, | ||
241 | cbm_cfg_size2, patch); | ||
242 | |||
243 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
244 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | ||
245 | ppc_in_gpc_stride * ppc_index, | ||
246 | alpha_offset_in_chunk, patch); | ||
247 | |||
248 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
249 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
250 | |||
251 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
252 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), | ||
253 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | | ||
254 | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3), | ||
255 | patch); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, | ||
263 | struct channel_ctx_gk20a *ch_ctx, | ||
264 | u64 addr, u32 size, bool patch) | ||
265 | { | ||
266 | gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); | ||
267 | |||
268 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(), | ||
269 | gr_gpcs_swdx_rm_pagepool_total_pages_f(size) | | ||
270 | gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch); | ||
271 | |||
272 | } | ||
273 | |||
274 | void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data) | ||
275 | { | ||
276 | u32 val; | ||
277 | |||
278 | gk20a_dbg_fn(""); | ||
279 | |||
280 | val = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); | ||
281 | val = set_field(val, gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), | ||
282 | gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data)); | ||
283 | gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val); | ||
284 | |||
285 | gk20a_dbg_fn("done"); | ||
286 | } | ||
287 | |||
288 | int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, | ||
289 | u32 class_num, u32 offset, u32 data) | ||
290 | { | ||
291 | gk20a_dbg_fn(""); | ||
292 | |||
293 | if (class_num == MAXWELL_COMPUTE_B) { | ||
294 | switch (offset << 2) { | ||
295 | case NVB1C0_SET_SHADER_EXCEPTIONS: | ||
296 | gk20a_gr_set_shader_exceptions(g, data); | ||
297 | break; | ||
298 | case NVB1C0_SET_RD_COALESCE: | ||
299 | gr_gm20b_set_rd_coalesce(g, data); | ||
300 | break; | ||
301 | default: | ||
302 | goto fail; | ||
303 | } | ||
304 | } | ||
305 | |||
306 | if (class_num == MAXWELL_B) { | ||
307 | switch (offset << 2) { | ||
308 | case NVB197_SET_SHADER_EXCEPTIONS: | ||
309 | gk20a_gr_set_shader_exceptions(g, data); | ||
310 | break; | ||
311 | case NVB197_SET_CIRCULAR_BUFFER_SIZE: | ||
312 | g->ops.gr.set_circular_buffer_size(g, data); | ||
313 | break; | ||
314 | case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
315 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
316 | break; | ||
317 | case NVB197_SET_RD_COALESCE: | ||
318 | gr_gm20b_set_rd_coalesce(g, data); | ||
319 | break; | ||
320 | default: | ||
321 | goto fail; | ||
322 | } | ||
323 | } | ||
324 | return 0; | ||
325 | |||
326 | fail: | ||
327 | return -EINVAL; | ||
328 | } | ||
329 | |||
330 | void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
331 | { | ||
332 | struct gr_gk20a *gr = &g->gr; | ||
333 | u32 gpc_index, ppc_index, stride, val; | ||
334 | u32 pd_ab_max_output; | ||
335 | u32 alpha_cb_size = data * 4; | ||
336 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
337 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
338 | |||
339 | gk20a_dbg_fn(""); | ||
340 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | ||
341 | return; */ | ||
342 | |||
343 | if (alpha_cb_size > gr->alpha_cb_size) | ||
344 | alpha_cb_size = gr->alpha_cb_size; | ||
345 | |||
346 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
347 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
348 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
349 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
350 | |||
351 | pd_ab_max_output = alpha_cb_size * | ||
352 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / | ||
353 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
354 | |||
355 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
356 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
357 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | ||
358 | |||
359 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
360 | stride = gpc_stride * gpc_index; | ||
361 | |||
362 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
363 | ppc_index++) { | ||
364 | |||
365 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
366 | stride + | ||
367 | ppc_in_gpc_stride * ppc_index); | ||
368 | |||
369 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | ||
370 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | ||
371 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
372 | |||
373 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
374 | stride + | ||
375 | ppc_in_gpc_stride * ppc_index, val); | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
381 | { | ||
382 | struct gr_gk20a *gr = &g->gr; | ||
383 | u32 gpc_index, ppc_index, stride, val; | ||
384 | u32 cb_size = data * 4; | ||
385 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
386 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
387 | |||
388 | gk20a_dbg_fn(""); | ||
389 | |||
390 | if (cb_size > gr->attrib_cb_size) | ||
391 | cb_size = gr->attrib_cb_size; | ||
392 | |||
393 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
394 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
395 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
396 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | ||
397 | |||
398 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
399 | stride = gpc_stride * gpc_index; | ||
400 | |||
401 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
402 | ppc_index++) { | ||
403 | |||
404 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
405 | stride + | ||
406 | ppc_in_gpc_stride * ppc_index); | ||
407 | |||
408 | val = set_field(val, | ||
409 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | ||
410 | gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * | ||
411 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
412 | |||
413 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
414 | stride + | ||
415 | ppc_in_gpc_stride * ppc_index, val); | ||
416 | |||
417 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
418 | ppc_index + gpc_index)); | ||
419 | |||
420 | val = set_field(val, | ||
421 | gr_gpcs_swdx_tc_beta_cb_size_v_m(), | ||
422 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size * | ||
423 | gr->gpc_ppc_count[gpc_index])); | ||
424 | val = set_field(val, | ||
425 | gr_gpcs_swdx_tc_beta_cb_size_div3_m(), | ||
426 | gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size * | ||
427 | gr->gpc_ppc_count[gpc_index])/3)); | ||
428 | |||
429 | gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
430 | ppc_index + gpc_index), val); | ||
431 | } | ||
432 | } | ||
433 | } | ||
434 | |||
435 | void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) | ||
436 | { | ||
437 | /* setup sm warp esr report masks */ | ||
438 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
439 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
440 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
441 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
442 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
443 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
444 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
445 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
446 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
447 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
448 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
449 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
450 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
451 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
452 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
453 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
454 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
455 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
456 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
457 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f() | | ||
458 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f() | | ||
459 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
460 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
461 | |||
462 | /* setup sm global esr report mask */ | ||
463 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
464 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
465 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f()); | ||
466 | } | ||
467 | |||
468 | bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | ||
469 | { | ||
470 | bool valid = false; | ||
471 | |||
472 | switch (class_num) { | ||
473 | case MAXWELL_COMPUTE_B: | ||
474 | case MAXWELL_B: | ||
475 | case FERMI_TWOD_A: | ||
476 | case KEPLER_DMA_COPY_A: | ||
477 | case MAXWELL_DMA_COPY_A: | ||
478 | valid = true; | ||
479 | break; | ||
480 | |||
481 | default: | ||
482 | break; | ||
483 | } | ||
484 | |||
485 | return valid; | ||
486 | } | ||
487 | |||
488 | bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
489 | { | ||
490 | if (class_num == MAXWELL_B) | ||
491 | return true; | ||
492 | else | ||
493 | return false; | ||
494 | } | ||
495 | |||
496 | bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
497 | { | ||
498 | if (class_num == MAXWELL_COMPUTE_B) | ||
499 | return true; | ||
500 | else | ||
501 | return false; | ||
502 | } | ||
503 | |||
504 | |||
505 | /* Following are the blocks of registers that the ucode | ||
506 | stores in the extended region.*/ | ||
507 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | ||
508 | static const u32 _num_sm_dsm_perf_regs; | ||
509 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
510 | static const u32 _num_sm_dsm_perf_ctrl_regs = 2; | ||
511 | static u32 *_sm_dsm_perf_regs; | ||
512 | static u32 _sm_dsm_perf_ctrl_regs[2]; | ||
513 | |||
514 | void gr_gm20b_init_sm_dsm_reg_info(void) | ||
515 | { | ||
516 | if (_sm_dsm_perf_ctrl_regs[0] != 0) | ||
517 | return; | ||
518 | |||
519 | _sm_dsm_perf_ctrl_regs[0] = | ||
520 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); | ||
521 | _sm_dsm_perf_ctrl_regs[1] = | ||
522 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); | ||
523 | } | ||
524 | |||
525 | void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, | ||
526 | u32 *num_sm_dsm_perf_regs, | ||
527 | u32 **sm_dsm_perf_regs, | ||
528 | u32 *perf_register_stride) | ||
529 | { | ||
530 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; | ||
531 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; | ||
532 | *perf_register_stride = 0; | ||
533 | } | ||
534 | |||
535 | void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
536 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
537 | u32 **sm_dsm_perf_ctrl_regs, | ||
538 | u32 *ctrl_register_stride) | ||
539 | { | ||
540 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
541 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
542 | |||
543 | *ctrl_register_stride = | ||
544 | ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
545 | } | ||
546 | |||
547 | u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
548 | { | ||
549 | u32 val; | ||
550 | struct gr_gk20a *gr = &g->gr; | ||
551 | |||
552 | /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ | ||
553 | val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index)); | ||
554 | |||
555 | return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); | ||
556 | } | ||
557 | |||
558 | void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
559 | { | ||
560 | nvgpu_tegra_fuse_write_bypass(g, 0x1); | ||
561 | nvgpu_tegra_fuse_write_access_sw(g, 0x0); | ||
562 | |||
563 | if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) { | ||
564 | nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); | ||
565 | nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1); | ||
566 | } else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2) { | ||
567 | nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); | ||
568 | nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); | ||
569 | } else { | ||
570 | nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); | ||
571 | nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); | ||
572 | } | ||
573 | } | ||
574 | |||
575 | void gr_gm20b_load_tpc_mask(struct gk20a *g) | ||
576 | { | ||
577 | u32 pes_tpc_mask = 0, fuse_tpc_mask; | ||
578 | u32 gpc, pes; | ||
579 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
580 | |||
581 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) | ||
582 | for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { | ||
583 | pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << | ||
584 | num_tpc_per_gpc * gpc; | ||
585 | } | ||
586 | |||
587 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); | ||
588 | if (g->tpc_fs_mask_user && g->tpc_fs_mask_user != fuse_tpc_mask && | ||
589 | fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) { | ||
590 | u32 val = g->tpc_fs_mask_user; | ||
591 | val &= (0x1U << g->gr.max_tpc_count) - 1U; | ||
592 | /* skip tpc to disable the other tpc cause channel timeout */ | ||
593 | val = (0x1U << hweight32(val)) - 1U; | ||
594 | gk20a_writel(g, gr_fe_tpc_fs_r(), val); | ||
595 | } else { | ||
596 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); | ||
597 | } | ||
598 | } | ||
599 | |||
600 | void gr_gm20b_program_sm_id_numbering(struct gk20a *g, | ||
601 | u32 gpc, u32 tpc, u32 smid) | ||
602 | { | ||
603 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
604 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
605 | u32 gpc_offset = gpc_stride * gpc; | ||
606 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
607 | |||
608 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
609 | gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); | ||
610 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
611 | gr_gpc0_gpm_pd_sm_id_id_f(smid)); | ||
612 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
613 | gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); | ||
614 | } | ||
615 | |||
616 | int gr_gm20b_load_smid_config(struct gk20a *g) | ||
617 | { | ||
618 | u32 *tpc_sm_id; | ||
619 | u32 i, j; | ||
620 | u32 tpc_index, gpc_index; | ||
621 | |||
622 | tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); | ||
623 | if (!tpc_sm_id) | ||
624 | return -ENOMEM; | ||
625 | |||
626 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ | ||
627 | for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { | ||
628 | u32 reg = 0; | ||
629 | u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + | ||
630 | gr_cwd_gpc_tpc_id_tpc0_s(); | ||
631 | |||
632 | for (j = 0; j < 4; j++) { | ||
633 | u32 sm_id = (i * 4) + j; | ||
634 | u32 bits; | ||
635 | |||
636 | if (sm_id >= g->gr.tpc_count) | ||
637 | break; | ||
638 | |||
639 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
640 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
641 | |||
642 | bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | | ||
643 | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
644 | reg |= bits << (j * bit_stride); | ||
645 | |||
646 | tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride; | ||
647 | } | ||
648 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); | ||
649 | } | ||
650 | |||
651 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | ||
652 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | ||
653 | |||
654 | nvgpu_kfree(g, tpc_sm_id); | ||
655 | |||
656 | return 0; | ||
657 | } | ||
658 | |||
659 | int gr_gm20b_init_fs_state(struct gk20a *g) | ||
660 | { | ||
661 | int err = 0; | ||
662 | |||
663 | gk20a_dbg_fn(""); | ||
664 | |||
665 | err = gr_gk20a_init_fs_state(g); | ||
666 | if (err) | ||
667 | return err; | ||
668 | |||
669 | g->ops.gr.load_tpc_mask(g); | ||
670 | |||
671 | gk20a_writel(g, gr_bes_zrop_settings_r(), | ||
672 | gr_bes_zrop_settings_num_active_ltcs_f(g->ltc_count)); | ||
673 | gk20a_writel(g, gr_bes_crop_settings_r(), | ||
674 | gr_bes_crop_settings_num_active_ltcs_f(g->ltc_count)); | ||
675 | |||
676 | gk20a_writel(g, gr_bes_crop_debug3_r(), | ||
677 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | ||
678 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | ||
679 | |||
680 | g->ops.gr.load_smid_config(g); | ||
681 | |||
682 | return err; | ||
683 | } | ||
684 | |||
685 | int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
686 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
687 | { | ||
688 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
689 | gr_fecs_dmactl_require_ctx_f(0)); | ||
690 | |||
691 | /* Copy falcon bootloader into dmem */ | ||
692 | gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); | ||
693 | gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); | ||
694 | |||
695 | /* start the falcon immediately if PRIV security is disabled*/ | ||
696 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
697 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | ||
698 | gr_fecs_cpuctl_startcpu_f(0x01)); | ||
699 | } | ||
700 | |||
701 | return 0; | ||
702 | } | ||
703 | |||
704 | static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) | ||
705 | { | ||
706 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
707 | u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); | ||
708 | return (addr >= tpc_in_gpc_shared_base) && | ||
709 | (addr < (tpc_in_gpc_shared_base + | ||
710 | tpc_in_gpc_stride)); | ||
711 | } | ||
712 | |||
713 | bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) | ||
714 | { | ||
715 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
716 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
717 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
718 | return ((addr >= tpc_in_gpc_base) && | ||
719 | (addr < tpc_in_gpc_base + | ||
720 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
721 | || gr_gm20b_is_tpc_addr_shared(g, addr); | ||
722 | } | ||
723 | |||
724 | u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) | ||
725 | { | ||
726 | u32 i, start; | ||
727 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
728 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
729 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
730 | |||
731 | for (i = 0; i < num_tpcs; i++) { | ||
732 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); | ||
733 | if ((addr >= start) && | ||
734 | (addr < (start + tpc_in_gpc_stride))) | ||
735 | return i; | ||
736 | } | ||
737 | return 0; | ||
738 | } | ||
739 | |||
740 | #ifdef CONFIG_TEGRA_ACR | ||
741 | static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) | ||
742 | { | ||
743 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | ||
744 | u64 addr_base = ucode_info->surface_desc.gpu_va; | ||
745 | |||
746 | gr_gk20a_load_falcon_bind_instblk(g); | ||
747 | |||
748 | g->ops.gr.falcon_load_ucode(g, addr_base, | ||
749 | &g->ctxsw_ucode_info.gpccs, | ||
750 | gr_gpcs_gpccs_falcon_hwcfg_r() - | ||
751 | gr_fecs_falcon_hwcfg_r()); | ||
752 | } | ||
753 | |||
754 | int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) | ||
755 | { | ||
756 | u32 err, flags; | ||
757 | u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - | ||
758 | gr_fecs_falcon_hwcfg_r(); | ||
759 | u8 falcon_id_mask = 0; | ||
760 | |||
761 | gk20a_dbg_fn(""); | ||
762 | |||
763 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
764 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), | ||
765 | gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); | ||
766 | gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), | ||
767 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); | ||
768 | } | ||
769 | |||
770 | flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; | ||
771 | g->pmu_lsf_loaded_falcon_id = 0; | ||
772 | if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) { | ||
773 | /* this must be recovery so bootstrap fecs and gpccs */ | ||
774 | if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { | ||
775 | gr_gm20b_load_gpccs_with_bootloader(g); | ||
776 | err = g->ops.pmu.load_lsfalcon_ucode(g, | ||
777 | (1 << LSF_FALCON_ID_FECS)); | ||
778 | } else { | ||
779 | /* bind WPR VA inst block */ | ||
780 | gr_gk20a_load_falcon_bind_instblk(g); | ||
781 | err = g->ops.pmu.load_lsfalcon_ucode(g, | ||
782 | (1 << LSF_FALCON_ID_FECS) | | ||
783 | (1 << LSF_FALCON_ID_GPCCS)); | ||
784 | } | ||
785 | if (err) { | ||
786 | nvgpu_err(g, "Unable to recover GR falcon"); | ||
787 | return err; | ||
788 | } | ||
789 | |||
790 | } else { | ||
791 | /* cold boot or rg exit */ | ||
792 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true); | ||
793 | if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { | ||
794 | gr_gm20b_load_gpccs_with_bootloader(g); | ||
795 | } else { | ||
796 | /* bind WPR VA inst block */ | ||
797 | gr_gk20a_load_falcon_bind_instblk(g); | ||
798 | if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_FECS)) | ||
799 | falcon_id_mask |= (1 << LSF_FALCON_ID_FECS); | ||
800 | if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_GPCCS)) | ||
801 | falcon_id_mask |= (1 << LSF_FALCON_ID_GPCCS); | ||
802 | |||
803 | err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask); | ||
804 | |||
805 | if (err) { | ||
806 | nvgpu_err(g, "Unable to boot GPCCS"); | ||
807 | return err; | ||
808 | } | ||
809 | } | ||
810 | } | ||
811 | |||
812 | /*start gpccs */ | ||
813 | if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { | ||
814 | gk20a_writel(g, reg_offset + | ||
815 | gr_fecs_cpuctl_alias_r(), | ||
816 | gr_gpccs_cpuctl_startcpu_f(1)); | ||
817 | } else { | ||
818 | gk20a_writel(g, gr_gpccs_dmactl_r(), | ||
819 | gr_gpccs_dmactl_require_ctx_f(0)); | ||
820 | gk20a_writel(g, gr_gpccs_cpuctl_r(), | ||
821 | gr_gpccs_cpuctl_startcpu_f(1)); | ||
822 | } | ||
823 | /* start fecs */ | ||
824 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0); | ||
825 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1); | ||
826 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff); | ||
827 | gk20a_writel(g, gr_fecs_cpuctl_alias_r(), | ||
828 | gr_fecs_cpuctl_startcpu_f(1)); | ||
829 | gk20a_dbg_fn("done"); | ||
830 | |||
831 | return 0; | ||
832 | } | ||
833 | #else | ||
834 | |||
835 | int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) | ||
836 | { | ||
837 | return -EPERM; | ||
838 | } | ||
839 | |||
840 | #endif | ||
841 | |||
842 | void gr_gm20b_detect_sm_arch(struct gk20a *g) | ||
843 | { | ||
844 | u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); | ||
845 | |||
846 | g->params.sm_arch_spa_version = | ||
847 | gr_gpc0_tpc0_sm_arch_spa_version_v(v); | ||
848 | g->params.sm_arch_sm_version = | ||
849 | gr_gpc0_tpc0_sm_arch_sm_version_v(v); | ||
850 | g->params.sm_arch_warp_count = | ||
851 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | ||
852 | } | ||
853 | |||
854 | u32 gr_gm20b_pagepool_default_size(struct gk20a *g) | ||
855 | { | ||
856 | return gr_scc_pagepool_total_pages_hwmax_value_v(); | ||
857 | } | ||
858 | |||
859 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | ||
860 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | ||
861 | u32 class, | ||
862 | u32 flags) | ||
863 | { | ||
864 | int err; | ||
865 | |||
866 | gk20a_dbg_fn(""); | ||
867 | |||
868 | err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); | ||
869 | if (err) | ||
870 | return err; | ||
871 | |||
872 | if (class == MAXWELL_COMPUTE_B) | ||
873 | (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
874 | |||
875 | gk20a_dbg_fn("done"); | ||
876 | |||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | ||
881 | struct channel_ctx_gk20a *ch_ctx, | ||
882 | struct nvgpu_mem *mem) | ||
883 | { | ||
884 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
885 | u32 cta_preempt_option = | ||
886 | ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); | ||
887 | |||
888 | gk20a_dbg_fn(""); | ||
889 | |||
890 | if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { | ||
891 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | ||
892 | nvgpu_mem_wr(g, mem, | ||
893 | ctxsw_prog_main_image_preemption_options_o(), | ||
894 | cta_preempt_option); | ||
895 | } | ||
896 | |||
897 | gk20a_dbg_fn("done"); | ||
898 | } | ||
899 | |||
900 | int gr_gm20b_dump_gr_status_regs(struct gk20a *g, | ||
901 | struct gk20a_debug_output *o) | ||
902 | { | ||
903 | struct gr_gk20a *gr = &g->gr; | ||
904 | u32 gr_engine_id; | ||
905 | |||
906 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
907 | |||
908 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", | ||
909 | gk20a_readl(g, gr_status_r())); | ||
910 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", | ||
911 | gk20a_readl(g, gr_status_1_r())); | ||
912 | gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n", | ||
913 | gk20a_readl(g, gr_status_2_r())); | ||
914 | gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n", | ||
915 | gk20a_readl(g, gr_engine_status_r())); | ||
916 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n", | ||
917 | gk20a_readl(g, gr_gpfifo_status_r())); | ||
918 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n", | ||
919 | gk20a_readl(g, gr_gpfifo_ctl_r())); | ||
920 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", | ||
921 | gk20a_readl(g, gr_fecs_host_int_status_r())); | ||
922 | gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n", | ||
923 | gk20a_readl(g, gr_exception_r())); | ||
924 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", | ||
925 | gk20a_readl(g, gr_fecs_intr_r())); | ||
926 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", | ||
927 | gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); | ||
928 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", | ||
929 | gk20a_readl(g, gr_activity_0_r())); | ||
930 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", | ||
931 | gk20a_readl(g, gr_activity_1_r())); | ||
932 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n", | ||
933 | gk20a_readl(g, gr_activity_2_r())); | ||
934 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n", | ||
935 | gk20a_readl(g, gr_activity_4_r())); | ||
936 | gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n", | ||
937 | gk20a_readl(g, gr_pri_sked_activity_r())); | ||
938 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
939 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r())); | ||
940 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
941 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r())); | ||
942 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
943 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r())); | ||
944 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
945 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); | ||
946 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
947 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); | ||
948 | if (gr->gpc_tpc_count[0] == 2) | ||
949 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
950 | gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); | ||
951 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
952 | gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r())); | ||
953 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
954 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r())); | ||
955 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
956 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r())); | ||
957 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
958 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r())); | ||
959 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
960 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); | ||
961 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
962 | gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); | ||
963 | if (gr->gpc_tpc_count[0] == 2) | ||
964 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
965 | gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); | ||
966 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
967 | gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r())); | ||
968 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n", | ||
969 | gk20a_readl(g, gr_pri_be0_becs_be_activity0_r())); | ||
970 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n", | ||
971 | gk20a_readl(g, gr_pri_be1_becs_be_activity0_r())); | ||
972 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n", | ||
973 | gk20a_readl(g, gr_pri_bes_becs_be_activity0_r())); | ||
974 | gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n", | ||
975 | gk20a_readl(g, gr_pri_ds_mpipe_status_r())); | ||
976 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_ON_STATUS: 0x%x\n", | ||
977 | gk20a_readl(g, gr_pri_fe_go_idle_on_status_r())); | ||
978 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n", | ||
979 | gk20a_readl(g, gr_fe_go_idle_timeout_r())); | ||
980 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_CHECK : 0x%x\n", | ||
981 | gk20a_readl(g, gr_pri_fe_go_idle_check_r())); | ||
982 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n", | ||
983 | gk20a_readl(g, gr_pri_fe_go_idle_info_r())); | ||
984 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n", | ||
985 | gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); | ||
986 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n", | ||
987 | gk20a_readl(g, gr_cwd_fs_r())); | ||
988 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n", | ||
989 | gk20a_readl(g, gr_fe_tpc_fs_r())); | ||
990 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n", | ||
991 | gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0))); | ||
992 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n", | ||
993 | gk20a_readl(g, gr_cwd_sm_id_r(0))); | ||
994 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n", | ||
995 | gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r())); | ||
996 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n", | ||
997 | gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); | ||
998 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n", | ||
999 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r())); | ||
1000 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n", | ||
1001 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r())); | ||
1002 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n", | ||
1003 | gk20a_readl(g, gr_fecs_ctxsw_idlestate_r())); | ||
1004 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n", | ||
1005 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r())); | ||
1006 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n", | ||
1007 | gk20a_readl(g, gr_fecs_current_ctx_r())); | ||
1008 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", | ||
1009 | gk20a_readl(g, gr_fecs_new_ctx_r())); | ||
1010 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", | ||
1011 | gk20a_readl(g, gr_pri_be0_crop_status1_r())); | ||
1012 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", | ||
1013 | gk20a_readl(g, gr_pri_bes_crop_status1_r())); | ||
1014 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n", | ||
1015 | gk20a_readl(g, gr_pri_be0_zrop_status_r())); | ||
1016 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n", | ||
1017 | gk20a_readl(g, gr_pri_be0_zrop_status2_r())); | ||
1018 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n", | ||
1019 | gk20a_readl(g, gr_pri_bes_zrop_status_r())); | ||
1020 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n", | ||
1021 | gk20a_readl(g, gr_pri_bes_zrop_status2_r())); | ||
1022 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n", | ||
1023 | gk20a_readl(g, gr_pri_be0_becs_be_exception_r())); | ||
1024 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n", | ||
1025 | gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r())); | ||
1026 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n", | ||
1027 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r())); | ||
1028 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n", | ||
1029 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r())); | ||
1030 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n", | ||
1031 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); | ||
1032 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", | ||
1033 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); | ||
1034 | |||
1035 | return 0; | ||
1036 | } | ||
1037 | |||
1038 | int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | ||
1039 | bool enable) | ||
1040 | { | ||
1041 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
1042 | struct nvgpu_mem *mem; | ||
1043 | u32 v; | ||
1044 | |||
1045 | gk20a_dbg_fn(""); | ||
1046 | |||
1047 | if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) | ||
1048 | return -EINVAL; | ||
1049 | |||
1050 | mem = &ch_ctx->gr_ctx->mem; | ||
1051 | |||
1052 | if (nvgpu_mem_begin(c->g, mem)) | ||
1053 | return -ENOMEM; | ||
1054 | |||
1055 | v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o()); | ||
1056 | v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); | ||
1057 | v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); | ||
1058 | nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v); | ||
1059 | |||
1060 | nvgpu_mem_end(c->g, mem); | ||
1061 | |||
1062 | gk20a_dbg_fn("done"); | ||
1063 | |||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
1067 | u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) | ||
1068 | { | ||
1069 | u32 fbp_en_mask, opt_fbio; | ||
1070 | u32 tmp, max_fbps_count; | ||
1071 | |||
1072 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
1073 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
1074 | |||
1075 | opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r()); | ||
1076 | fbp_en_mask = | ||
1077 | ((1 << max_fbps_count) - 1) ^ | ||
1078 | fuse_status_opt_fbio_data_v(opt_fbio); | ||
1079 | return fbp_en_mask; | ||
1080 | } | ||
1081 | |||
1082 | u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) | ||
1083 | { | ||
1084 | u32 ltc_per_fbp, reg; | ||
1085 | reg = gk20a_readl(g, top_ltc_per_fbp_r()); | ||
1086 | ltc_per_fbp = top_ltc_per_fbp_value_v(reg); | ||
1087 | return ltc_per_fbp; | ||
1088 | } | ||
1089 | |||
1090 | u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) | ||
1091 | { | ||
1092 | u32 lts_per_ltc, reg; | ||
1093 | reg = gk20a_readl(g, top_slices_per_ltc_r()); | ||
1094 | lts_per_ltc = top_slices_per_ltc_value_v(reg); | ||
1095 | return lts_per_ltc; | ||
1096 | } | ||
1097 | |||
1098 | u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) | ||
1099 | { | ||
1100 | struct gr_gk20a *gr = &g->gr; | ||
1101 | u32 i, tmp, max_fbps_count, max_ltc_per_fbp; | ||
1102 | u32 rop_l2_all_en; | ||
1103 | |||
1104 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
1105 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
1106 | max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g); | ||
1107 | rop_l2_all_en = (1 << max_ltc_per_fbp) - 1; | ||
1108 | |||
1109 | /* mask of Rop_L2 for each FBP */ | ||
1110 | for (i = 0; i < max_fbps_count; i++) { | ||
1111 | tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i)); | ||
1112 | gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp; | ||
1113 | } | ||
1114 | |||
1115 | return gr->fbp_rop_l2_en_mask; | ||
1116 | } | ||
1117 | |||
1118 | u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) | ||
1119 | { | ||
1120 | u32 tmp, max_fbps_count; | ||
1121 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
1122 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
1123 | return max_fbps_count; | ||
1124 | } | ||
1125 | |||
1126 | void gr_gm20b_init_cyclestats(struct gk20a *g) | ||
1127 | { | ||
1128 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
1129 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, true); | ||
1130 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT, true); | ||
1131 | g->gr.max_css_buffer_size = 0xffffffff; | ||
1132 | #else | ||
1133 | (void)g; | ||
1134 | #endif | ||
1135 | } | ||
1136 | |||
1137 | void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) | ||
1138 | { | ||
1139 | u32 cde_v; | ||
1140 | |||
1141 | cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o()); | ||
1142 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); | ||
1143 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); | ||
1144 | } | ||
1145 | |||
1146 | void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) | ||
1147 | { | ||
1148 | /* Check if we have at least one valid warp */ | ||
1149 | /* get paused state on maxwell */ | ||
1150 | struct gr_gk20a *gr = &g->gr; | ||
1151 | u32 gpc, tpc, sm_id; | ||
1152 | u32 tpc_offset, gpc_offset, reg_offset; | ||
1153 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
1154 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1155 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1156 | |||
1157 | /* for maxwell & kepler */ | ||
1158 | u32 numSmPerTpc = 1; | ||
1159 | u32 numWarpPerTpc = g->params.sm_arch_warp_count * numSmPerTpc; | ||
1160 | |||
1161 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
1162 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1163 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1164 | |||
1165 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
1166 | gpc_offset = gpc_stride * gpc; | ||
1167 | reg_offset = tpc_offset + gpc_offset; | ||
1168 | |||
1169 | /* 64 bit read */ | ||
1170 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
1171 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
1172 | |||
1173 | /* 64 bit read */ | ||
1174 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
1175 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
1176 | |||
1177 | /* 64 bit read */ | ||
1178 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
1179 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
1180 | |||
1181 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
1182 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
1183 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
1184 | |||
1185 | |||
1186 | if (numWarpPerTpc > 64) { | ||
1187 | /* 64 bit read */ | ||
1188 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32; | ||
1189 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset); | ||
1190 | |||
1191 | /* 64 bit read */ | ||
1192 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32; | ||
1193 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset); | ||
1194 | |||
1195 | /* 64 bit read */ | ||
1196 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32; | ||
1197 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset); | ||
1198 | |||
1199 | w_state[sm_id].valid_warps[1] = warps_valid; | ||
1200 | w_state[sm_id].trapped_warps[1] = warps_trapped; | ||
1201 | w_state[sm_id].paused_warps[1] = warps_paused; | ||
1202 | } | ||
1203 | } | ||
1204 | |||
1205 | |||
1206 | /* Only for debug purpose */ | ||
1207 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
1208 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
1209 | sm_id, w_state[sm_id].valid_warps[0]); | ||
1210 | gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n", | ||
1211 | sm_id, w_state[sm_id].valid_warps[1]); | ||
1212 | |||
1213 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
1214 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
1215 | gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n", | ||
1216 | sm_id, w_state[sm_id].trapped_warps[1]); | ||
1217 | |||
1218 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
1219 | sm_id, w_state[sm_id].paused_warps[0]); | ||
1220 | gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n", | ||
1221 | sm_id, w_state[sm_id].paused_warps[1]); | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | void gr_gm20b_get_access_map(struct gk20a *g, | ||
1226 | u32 **whitelist, int *num_entries) | ||
1227 | { | ||
1228 | static u32 wl_addr_gm20b[] = { | ||
1229 | /* this list must be sorted (low to high) */ | ||
1230 | 0x404468, /* gr_pri_mme_max_instructions */ | ||
1231 | 0x418300, /* gr_pri_gpcs_rasterarb_line_class */ | ||
1232 | 0x418800, /* gr_pri_gpcs_setup_debug */ | ||
1233 | 0x418e00, /* gr_pri_gpcs_swdx_config */ | ||
1234 | 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1235 | 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1236 | 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1237 | 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1238 | 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1239 | 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1240 | 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1241 | 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1242 | 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1243 | 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1244 | 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1245 | 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1246 | 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1247 | 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1248 | 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1249 | 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1250 | 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1251 | 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1252 | 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1253 | 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1254 | 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1255 | 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */ | ||
1256 | 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ | ||
1257 | 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ | ||
1258 | 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ | ||
1259 | 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ | ||
1260 | }; | ||
1261 | |||
1262 | *whitelist = wl_addr_gm20b; | ||
1263 | *num_entries = ARRAY_SIZE(wl_addr_gm20b); | ||
1264 | } | ||
1265 | |||
1266 | int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
1267 | { | ||
1268 | int sm_id; | ||
1269 | struct gr_gk20a *gr = &g->gr; | ||
1270 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1271 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1272 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1273 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1274 | |||
1275 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1276 | |||
1277 | sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, | ||
1278 | gr_gpc0_tpc0_sm_cfg_r() + offset)); | ||
1279 | |||
1280 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
1281 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
1282 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
1283 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
1284 | gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g, | ||
1285 | gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset); | ||
1286 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
1287 | gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset); | ||
1288 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
1289 | gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset); | ||
1290 | |||
1291 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1292 | |||
1293 | return 0; | ||
1294 | } | ||
1295 | |||
1296 | int gm20b_gr_update_sm_error_state(struct gk20a *g, | ||
1297 | struct channel_gk20a *ch, u32 sm_id, | ||
1298 | struct nvgpu_gr_sm_error_state *sm_error_state) | ||
1299 | { | ||
1300 | u32 gpc, tpc, offset; | ||
1301 | struct gr_gk20a *gr = &g->gr; | ||
1302 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
1303 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1304 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1305 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1306 | int err = 0; | ||
1307 | |||
1308 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1309 | |||
1310 | gr->sm_error_states[sm_id].hww_global_esr = | ||
1311 | sm_error_state->hww_global_esr; | ||
1312 | gr->sm_error_states[sm_id].hww_warp_esr = | ||
1313 | sm_error_state->hww_warp_esr; | ||
1314 | gr->sm_error_states[sm_id].hww_warp_esr_pc = | ||
1315 | sm_error_state->hww_warp_esr_pc; | ||
1316 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = | ||
1317 | sm_error_state->hww_global_esr_report_mask; | ||
1318 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = | ||
1319 | sm_error_state->hww_warp_esr_report_mask; | ||
1320 | |||
1321 | err = gr_gk20a_disable_ctxsw(g); | ||
1322 | if (err) { | ||
1323 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
1324 | goto fail; | ||
1325 | } | ||
1326 | |||
1327 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1328 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1329 | |||
1330 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1331 | |||
1332 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
1333 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1334 | gr->sm_error_states[sm_id].hww_global_esr); | ||
1335 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
1336 | gr->sm_error_states[sm_id].hww_warp_esr); | ||
1337 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset, | ||
1338 | gr->sm_error_states[sm_id].hww_warp_esr_pc); | ||
1339 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
1340 | gr->sm_error_states[sm_id].hww_global_esr_report_mask); | ||
1341 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
1342 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask); | ||
1343 | } else { | ||
1344 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | ||
1345 | if (err) | ||
1346 | goto enable_ctxsw; | ||
1347 | |||
1348 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
1349 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
1350 | gr->sm_error_states[sm_id].hww_global_esr_report_mask, | ||
1351 | true); | ||
1352 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
1353 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
1354 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask, | ||
1355 | true); | ||
1356 | |||
1357 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); | ||
1358 | } | ||
1359 | |||
1360 | enable_ctxsw: | ||
1361 | err = gr_gk20a_enable_ctxsw(g); | ||
1362 | |||
1363 | fail: | ||
1364 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1365 | return err; | ||
1366 | } | ||
1367 | |||
1368 | int gm20b_gr_clear_sm_error_state(struct gk20a *g, | ||
1369 | struct channel_gk20a *ch, u32 sm_id) | ||
1370 | { | ||
1371 | u32 gpc, tpc, offset; | ||
1372 | u32 val; | ||
1373 | struct gr_gk20a *gr = &g->gr; | ||
1374 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1375 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1376 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1377 | int err = 0; | ||
1378 | |||
1379 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1380 | |||
1381 | memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states)); | ||
1382 | |||
1383 | err = gr_gk20a_disable_ctxsw(g); | ||
1384 | if (err) { | ||
1385 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
1386 | goto fail; | ||
1387 | } | ||
1388 | |||
1389 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
1390 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1391 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1392 | |||
1393 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1394 | |||
1395 | val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
1396 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1397 | val); | ||
1398 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
1399 | 0); | ||
1400 | } | ||
1401 | |||
1402 | err = gr_gk20a_enable_ctxsw(g); | ||
1403 | |||
1404 | fail: | ||
1405 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1406 | return err; | ||
1407 | } | ||
1408 | |||
1409 | int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, | ||
1410 | struct nvgpu_preemption_modes_rec *preemption_modes_rec) | ||
1411 | { | ||
1412 | preemption_modes_rec->graphics_preemption_mode_flags = | ||
1413 | NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
1414 | preemption_modes_rec->compute_preemption_mode_flags = ( | ||
1415 | NVGPU_PREEMPTION_MODE_COMPUTE_WFI | | ||
1416 | NVGPU_PREEMPTION_MODE_COMPUTE_CTA); | ||
1417 | |||
1418 | preemption_modes_rec->default_graphics_preempt_mode = | ||
1419 | NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
1420 | preemption_modes_rec->default_compute_preempt_mode = | ||
1421 | NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
1422 | |||
1423 | return 0; | ||
1424 | } | ||
1425 | |||
1426 | bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) | ||
1427 | { | ||
1428 | u32 ltc_shared_base = ltc_ltcs_ltss_v(); | ||
1429 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1430 | |||
1431 | return (addr >= ltc_shared_base) && | ||
1432 | (addr < (ltc_shared_base + lts_stride)); | ||
1433 | } | ||
1434 | |||
1435 | bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) | ||
1436 | { | ||
1437 | u32 lts_shared_base = ltc_ltc0_ltss_v(); | ||
1438 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1439 | u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1; | ||
1440 | u32 base_offset = lts_shared_base & addr_mask; | ||
1441 | u32 end_offset = base_offset + lts_stride; | ||
1442 | |||
1443 | return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) && | ||
1444 | ((addr & addr_mask) >= base_offset) && | ||
1445 | ((addr & addr_mask) < end_offset); | ||
1446 | } | ||
1447 | |||
1448 | static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, | ||
1449 | u32 *priv_addr_table, | ||
1450 | u32 *priv_addr_table_index) | ||
1451 | { | ||
1452 | u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g); | ||
1453 | u32 index = *priv_addr_table_index; | ||
1454 | u32 lts_num; | ||
1455 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1456 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1457 | |||
1458 | for (lts_num = 0; lts_num < num_ltc_slices; lts_num++) | ||
1459 | priv_addr_table[index++] = ltc_ltc0_lts0_v() + | ||
1460 | ltc_num * ltc_stride + | ||
1461 | lts_num * lts_stride + | ||
1462 | (addr & (lts_stride - 1)); | ||
1463 | |||
1464 | *priv_addr_table_index = index; | ||
1465 | } | ||
1466 | |||
1467 | void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
1468 | u32 *priv_addr_table, | ||
1469 | u32 *priv_addr_table_index) | ||
1470 | { | ||
1471 | u32 num_ltc = g->ltc_count; | ||
1472 | u32 i, start, ltc_num = 0; | ||
1473 | u32 pltcg_base = ltc_pltcg_base_v(); | ||
1474 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1475 | |||
1476 | for (i = 0; i < num_ltc; i++) { | ||
1477 | start = pltcg_base + i * ltc_stride; | ||
1478 | if ((addr >= start) && (addr < (start + ltc_stride))) { | ||
1479 | ltc_num = i; | ||
1480 | break; | ||
1481 | } | ||
1482 | } | ||
1483 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, | ||
1484 | priv_addr_table_index); | ||
1485 | } | ||
1486 | |||
1487 | void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
1488 | u32 *priv_addr_table, | ||
1489 | u32 *priv_addr_table_index) | ||
1490 | { | ||
1491 | u32 num_ltc = g->ltc_count; | ||
1492 | u32 ltc_num; | ||
1493 | |||
1494 | for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) | ||
1495 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, | ||
1496 | priv_addr_table, priv_addr_table_index); | ||
1497 | } | ||
1498 | |||
1499 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
1500 | u32 global_esr) | ||
1501 | { | ||
1502 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); | ||
1503 | |||
1504 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1505 | global_esr); | ||
1506 | |||
1507 | /* clear the warp hww */ | ||
1508 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0); | ||
1509 | } | ||
1510 | |||
1511 | /* | ||
1512 | * Disable both surface and LG coalesce. | ||
1513 | */ | ||
1514 | void gm20a_gr_disable_rd_coalesce(struct gk20a *g) | ||
1515 | { | ||
1516 | u32 dbg2_reg; | ||
1517 | |||
1518 | dbg2_reg = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); | ||
1519 | dbg2_reg = set_field(dbg2_reg, | ||
1520 | gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), | ||
1521 | gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(0)); | ||
1522 | dbg2_reg = set_field(dbg2_reg, | ||
1523 | gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(), | ||
1524 | gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(0)); | ||
1525 | |||
1526 | gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); | ||
1527 | } | ||