summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c2357
1 files changed, 2357 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
new file mode 100644
index 00000000..08988ac8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -0,0 +1,2357 @@
1/*
2 * GP10B GPU GR
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <dt-bindings/soc/gm20b-fuse.h>
26#include <dt-bindings/soc/gp10b-fuse.h>
27#include <uapi/linux/nvgpu.h>
28
29#include <nvgpu/timers.h>
30#include <nvgpu/kmem.h>
31#include <nvgpu/gmmu.h>
32#include <nvgpu/dma.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/debug.h>
35#include <nvgpu/fuse.h>
36#include <nvgpu/enabled.h>
37
38#include "gk20a/gk20a.h"
39#include "gk20a/gr_gk20a.h"
40#include "gk20a/dbg_gpu_gk20a.h"
41#include "gk20a/regops_gk20a.h"
42#include "common/linux/os_linux.h"
43
44#include "gm20b/gr_gm20b.h"
45#include "gp10b/gr_gp10b.h"
46
47#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
48#include <nvgpu/hw/gp10b/hw_fifo_gp10b.h>
49#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
50#include <nvgpu/hw/gp10b/hw_mc_gp10b.h>
51#include <nvgpu/hw/gp10b/hw_fuse_gp10b.h>
52
53bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
54{
55 bool valid = false;
56
57 switch (class_num) {
58 case PASCAL_COMPUTE_A:
59 case PASCAL_A:
60 case PASCAL_DMA_COPY_A:
61 valid = true;
62 break;
63
64 case MAXWELL_COMPUTE_B:
65 case MAXWELL_B:
66 case FERMI_TWOD_A:
67 case KEPLER_DMA_COPY_A:
68 case MAXWELL_DMA_COPY_A:
69 valid = true;
70 break;
71
72 default:
73 break;
74 }
75 gk20a_dbg_info("class=0x%x valid=%d", class_num, valid);
76 return valid;
77}
78
79bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
80{
81 if (class_num == PASCAL_A || class_num == MAXWELL_B)
82 return true;
83 else
84 return false;
85}
86
87bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
88{
89 if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B)
90 return true;
91 else
92 return false;
93}
94
95
96static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
97 u32 sed_status,
98 u32 ded_status,
99 u32 *count_to_adjust,
100 u32 opposite_count)
101{
102 u32 over_count = 0;
103
104 sed_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b();
105 ded_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b();
106
107 /* One overcount for each partition on which a SBE occurred but not a
108 DBE (or vice-versa) */
109 if (single_err) {
110 over_count =
111 hweight32(sed_status & ~ded_status);
112 } else {
113 over_count =
114 hweight32(ded_status & ~sed_status);
115 }
116
117 /* If both a SBE and a DBE occur on the same partition, then we have an
118 overcount for the subpartition if the opposite error counts are
119 zero. */
120 if ((sed_status & ded_status) && (opposite_count == 0)) {
121 over_count +=
122 hweight32(sed_status & ded_status);
123 }
124
125 if (*count_to_adjust > over_count)
126 *count_to_adjust -= over_count;
127 else
128 *count_to_adjust = 0;
129}
130
131int gr_gp10b_handle_sm_exception(struct gk20a *g,
132 u32 gpc, u32 tpc, u32 sm,
133 bool *post_event, struct channel_gk20a *fault_ch,
134 u32 *hww_global_esr)
135{
136 int ret = 0;
137 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
138 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
139 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
140 u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status;
141 u32 lrf_single_count_delta, lrf_double_count_delta;
142 u32 shm_ecc_status;
143
144 ret = gr_gk20a_handle_sm_exception(g,
145 gpc, tpc, sm, post_event, fault_ch, hww_global_esr);
146
147 /* Check for LRF ECC errors. */
148 lrf_ecc_status = gk20a_readl(g,
149 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
150 lrf_ecc_sed_status = lrf_ecc_status &
151 (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() |
152 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f() |
153 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f() |
154 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f());
155 lrf_ecc_ded_status = lrf_ecc_status &
156 (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f() |
157 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f() |
158 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() |
159 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f());
160 lrf_single_count_delta =
161 gk20a_readl(g,
162 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() +
163 offset);
164 lrf_double_count_delta =
165 gk20a_readl(g,
166 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() +
167 offset);
168 gk20a_writel(g,
169 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
170 0);
171 gk20a_writel(g,
172 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
173 0);
174 if (lrf_ecc_sed_status) {
175 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
176 "Single bit error detected in SM LRF!");
177
178 gr_gp10b_sm_lrf_ecc_overcount_war(1,
179 lrf_ecc_sed_status,
180 lrf_ecc_ded_status,
181 &lrf_single_count_delta,
182 lrf_double_count_delta);
183 g->ecc.gr.t18x.sm_lrf_single_err_count.counters[tpc] +=
184 lrf_single_count_delta;
185 }
186 if (lrf_ecc_ded_status) {
187 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
188 "Double bit error detected in SM LRF!");
189
190 gr_gp10b_sm_lrf_ecc_overcount_war(0,
191 lrf_ecc_sed_status,
192 lrf_ecc_ded_status,
193 &lrf_double_count_delta,
194 lrf_single_count_delta);
195 g->ecc.gr.t18x.sm_lrf_double_err_count.counters[tpc] +=
196 lrf_double_count_delta;
197 }
198 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
199 lrf_ecc_status);
200
201 /* Check for SHM ECC errors. */
202 shm_ecc_status = gk20a_readl(g,
203 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
204 if ((shm_ecc_status &
205 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) ||
206 (shm_ecc_status &
207 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) ||
208 (shm_ecc_status &
209 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
210 (shm_ecc_status &
211 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
212 u32 ecc_stats_reg_val;
213
214 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
215 "Single bit error detected in SM SHM!");
216
217 ecc_stats_reg_val =
218 gk20a_readl(g,
219 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
220 g->ecc.gr.t18x.sm_shm_sec_count.counters[tpc] +=
221 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
222 g->ecc.gr.t18x.sm_shm_sed_count.counters[tpc] +=
223 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
224 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
225 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
226 gk20a_writel(g,
227 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
228 ecc_stats_reg_val);
229 }
230 if ( (shm_ecc_status &
231 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
232 (shm_ecc_status &
233 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
234 u32 ecc_stats_reg_val;
235
236 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
237 "Double bit error detected in SM SHM!");
238
239 ecc_stats_reg_val =
240 gk20a_readl(g,
241 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
242 g->ecc.gr.t18x.sm_shm_ded_count.counters[tpc] +=
243 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
244 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
245 gk20a_writel(g,
246 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
247 ecc_stats_reg_val);
248 }
249 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
250 shm_ecc_status);
251
252
253 return ret;
254}
255
256int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
257 bool *post_event)
258{
259 int ret = 0;
260 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
261 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
262 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
263 u32 esr;
264 u32 ecc_stats_reg_val;
265
266 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
267
268 esr = gk20a_readl(g,
269 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
270 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
271
272 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
273 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
274 "Single bit error detected in TEX!");
275
276 /* Pipe 0 counters */
277 gk20a_writel(g,
278 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
279 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
280
281 ecc_stats_reg_val = gk20a_readl(g,
282 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
283 g->ecc.gr.t18x.tex_total_sec_pipe0_count.counters[tpc] +=
284 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
285 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
286 gk20a_writel(g,
287 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
288 ecc_stats_reg_val);
289
290 ecc_stats_reg_val = gk20a_readl(g,
291 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
292 g->ecc.gr.t18x.tex_unique_sec_pipe0_count.counters[tpc] +=
293 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
294 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
295 gk20a_writel(g,
296 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
297 ecc_stats_reg_val);
298
299
300 /* Pipe 1 counters */
301 gk20a_writel(g,
302 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
303 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
304
305 ecc_stats_reg_val = gk20a_readl(g,
306 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
307 g->ecc.gr.t18x.tex_total_sec_pipe1_count.counters[tpc] +=
308 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
309 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
310 gk20a_writel(g,
311 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
312 ecc_stats_reg_val);
313
314 ecc_stats_reg_val = gk20a_readl(g,
315 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
316 g->ecc.gr.t18x.tex_unique_sec_pipe1_count.counters[tpc] +=
317 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
318 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
319 gk20a_writel(g,
320 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
321 ecc_stats_reg_val);
322
323
324 gk20a_writel(g,
325 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
326 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
327 }
328 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
329 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
330 "Double bit error detected in TEX!");
331
332 /* Pipe 0 counters */
333 gk20a_writel(g,
334 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
335 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
336
337 ecc_stats_reg_val = gk20a_readl(g,
338 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
339 g->ecc.gr.t18x.tex_total_ded_pipe0_count.counters[tpc] +=
340 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
341 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
342 gk20a_writel(g,
343 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
344 ecc_stats_reg_val);
345
346 ecc_stats_reg_val = gk20a_readl(g,
347 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
348 g->ecc.gr.t18x.tex_unique_ded_pipe0_count.counters[tpc] +=
349 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
350 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
351 gk20a_writel(g,
352 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
353 ecc_stats_reg_val);
354
355
356 /* Pipe 1 counters */
357 gk20a_writel(g,
358 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
359 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
360
361 ecc_stats_reg_val = gk20a_readl(g,
362 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
363 g->ecc.gr.t18x.tex_total_ded_pipe1_count.counters[tpc] +=
364 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
365 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
366 gk20a_writel(g,
367 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
368 ecc_stats_reg_val);
369
370 ecc_stats_reg_val = gk20a_readl(g,
371 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
372 g->ecc.gr.t18x.tex_unique_ded_pipe1_count.counters[tpc] +=
373 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
374 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
375 gk20a_writel(g,
376 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
377 ecc_stats_reg_val);
378
379
380 gk20a_writel(g,
381 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
382 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
383 }
384
385 gk20a_writel(g,
386 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
387 esr | gr_gpc0_tpc0_tex_m_hww_esr_reset_active_f());
388
389 return ret;
390}
391
392int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
393 struct channel_gk20a *c, bool patch)
394{
395 struct gr_gk20a *gr = &g->gr;
396 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
397 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
398 u32 attrib_offset_in_chunk = 0;
399 u32 alpha_offset_in_chunk = 0;
400 u32 pd_ab_max_output;
401 u32 gpc_index, ppc_index;
402 u32 temp, temp2;
403 u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
404 u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
405 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
406 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
407 u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
408
409 gk20a_dbg_fn("");
410
411 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
412 attrib_size_in_chunk = gr->attrib_cb_default_size +
413 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
414 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
415 cb_attrib_cache_size_init = gr->attrib_cb_default_size +
416 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
417 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
418 } else {
419 attrib_size_in_chunk = gr->attrib_cb_size;
420 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
421 }
422
423 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
424 gr->attrib_cb_default_size, patch);
425 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(),
426 gr->alpha_cb_default_size, patch);
427
428 pd_ab_max_output = (gr->alpha_cb_default_size *
429 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
430 gr_pd_ab_dist_cfg1_max_output_granularity_v();
431
432 if (g->gr.pd_max_batches) {
433 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
434 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
435 gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch);
436 } else {
437 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
438 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
439 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
440 }
441
442 attrib_offset_in_chunk = alpha_offset_in_chunk +
443 gr->tpc_count * gr->alpha_cb_size;
444
445 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
446 temp = gpc_stride * gpc_index;
447 temp2 = num_pes_per_gpc * gpc_index;
448 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
449 ppc_index++) {
450 cbm_cfg_size_beta = cb_attrib_cache_size_init *
451 gr->pes_tpc_count[ppc_index][gpc_index];
452 cbm_cfg_size_alpha = gr->alpha_cb_default_size *
453 gr->pes_tpc_count[ppc_index][gpc_index];
454 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
455 gr->pes_tpc_count[ppc_index][gpc_index];
456
457 gr_gk20a_ctx_patch_write(g, ch_ctx,
458 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
459 ppc_in_gpc_stride * ppc_index,
460 cbm_cfg_size_beta, patch);
461
462 gr_gk20a_ctx_patch_write(g, ch_ctx,
463 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
464 ppc_in_gpc_stride * ppc_index,
465 attrib_offset_in_chunk, patch);
466
467 gr_gk20a_ctx_patch_write(g, ch_ctx,
468 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
469 ppc_in_gpc_stride * ppc_index,
470 cbm_cfg_size_steadystate,
471 patch);
472
473 attrib_offset_in_chunk += attrib_size_in_chunk *
474 gr->pes_tpc_count[ppc_index][gpc_index];
475
476 gr_gk20a_ctx_patch_write(g, ch_ctx,
477 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
478 ppc_in_gpc_stride * ppc_index,
479 cbm_cfg_size_alpha, patch);
480
481 gr_gk20a_ctx_patch_write(g, ch_ctx,
482 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
483 ppc_in_gpc_stride * ppc_index,
484 alpha_offset_in_chunk, patch);
485
486 alpha_offset_in_chunk += gr->alpha_cb_size *
487 gr->pes_tpc_count[ppc_index][gpc_index];
488
489 gr_gk20a_ctx_patch_write(g, ch_ctx,
490 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
491 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
492 patch);
493 }
494 }
495
496 return 0;
497}
498
499void gr_gp10b_commit_global_pagepool(struct gk20a *g,
500 struct channel_ctx_gk20a *ch_ctx,
501 u64 addr, u32 size, bool patch)
502{
503 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
504 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
505
506 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
507 gr_scc_pagepool_total_pages_f(size) |
508 gr_scc_pagepool_valid_true_f(), patch);
509
510 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
511 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
512
513 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
514 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
515}
516
517int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
518 struct zbc_entry *color_val, u32 index)
519{
520 u32 i;
521 u32 zbc_c;
522
523 /* update l2 table */
524 g->ops.ltc.set_zbc_color_entry(g, color_val, index);
525
526 /* update ds table */
527 gk20a_writel(g, gr_ds_zbc_color_r_r(),
528 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
529 gk20a_writel(g, gr_ds_zbc_color_g_r(),
530 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
531 gk20a_writel(g, gr_ds_zbc_color_b_r(),
532 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
533 gk20a_writel(g, gr_ds_zbc_color_a_r(),
534 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
535
536 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
537 gr_ds_zbc_color_fmt_val_f(color_val->format));
538
539 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
540 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
541
542 /* trigger the write */
543 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
544 gr_ds_zbc_tbl_ld_select_c_f() |
545 gr_ds_zbc_tbl_ld_action_write_f() |
546 gr_ds_zbc_tbl_ld_trigger_active_f());
547
548 /* update local copy */
549 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
550 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
551 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
552 }
553 gr->zbc_col_tbl[index].format = color_val->format;
554 gr->zbc_col_tbl[index].ref_cnt++;
555
556 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_r_r(index),
557 color_val->color_ds[0]);
558 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_g_r(index),
559 color_val->color_ds[1]);
560 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_b_r(index),
561 color_val->color_ds[2]);
562 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_a_r(index),
563 color_val->color_ds[3]);
564 zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3));
565 zbc_c &= ~(0x7f << ((index % 4) * 7));
566 zbc_c |= color_val->format << ((index % 4) * 7);
567 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c);
568
569 return 0;
570}
571
572int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
573 struct zbc_entry *depth_val, u32 index)
574{
575 u32 zbc_z;
576
577 /* update l2 table */
578 g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
579
580 /* update ds table */
581 gk20a_writel(g, gr_ds_zbc_z_r(),
582 gr_ds_zbc_z_val_f(depth_val->depth));
583
584 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
585 gr_ds_zbc_z_fmt_val_f(depth_val->format));
586
587 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
588 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
589
590 /* trigger the write */
591 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
592 gr_ds_zbc_tbl_ld_select_z_f() |
593 gr_ds_zbc_tbl_ld_action_write_f() |
594 gr_ds_zbc_tbl_ld_trigger_active_f());
595
596 /* update local copy */
597 gr->zbc_dep_tbl[index].depth = depth_val->depth;
598 gr->zbc_dep_tbl[index].format = depth_val->format;
599 gr->zbc_dep_tbl[index].ref_cnt++;
600
601 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth);
602 zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3));
603 zbc_z &= ~(0x7f << (index % 4) * 7);
604 zbc_z |= depth_val->format << (index % 4) * 7;
605 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3), zbc_z);
606
607 return 0;
608}
609
610u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
611{
612 return gr_scc_pagepool_total_pages_hwmax_value_v();
613}
614
615int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
616{
617 struct gr_gk20a *gr = &g->gr;
618 int size;
619
620 gr->attrib_cb_size = gr->attrib_cb_default_size;
621 gr->alpha_cb_size = gr->alpha_cb_default_size;
622
623 gr->attrib_cb_size = min(gr->attrib_cb_size,
624 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count);
625 gr->alpha_cb_size = min(gr->alpha_cb_size,
626 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count);
627
628 size = gr->attrib_cb_size *
629 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
630 gr->max_tpc_count;
631
632 size += gr->alpha_cb_size *
633 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
634 gr->max_tpc_count;
635
636 size = ALIGN(size, 128);
637
638 return size;
639}
640
641static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data)
642{
643 gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
644}
645
646static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
647{
648 u32 val;
649
650 gk20a_dbg_fn("");
651
652 val = gk20a_readl(g, gr_gpcs_tc_debug0_r());
653 val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(),
654 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data));
655 gk20a_writel(g, gr_gpcs_tc_debug0_r(), val);
656
657 gk20a_dbg_fn("done");
658}
659
660void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
661{
662 u32 val;
663
664 gk20a_dbg_fn("");
665
666 val = gk20a_readl(g, gr_bes_crop_debug3_r());
667 if ((data & 1)) {
668 val = set_field(val,
669 gr_bes_crop_debug3_blendopt_read_suppress_m(),
670 gr_bes_crop_debug3_blendopt_read_suppress_enabled_f());
671 val = set_field(val,
672 gr_bes_crop_debug3_blendopt_fill_override_m(),
673 gr_bes_crop_debug3_blendopt_fill_override_enabled_f());
674 } else {
675 val = set_field(val,
676 gr_bes_crop_debug3_blendopt_read_suppress_m(),
677 gr_bes_crop_debug3_blendopt_read_suppress_disabled_f());
678 val = set_field(val,
679 gr_bes_crop_debug3_blendopt_fill_override_m(),
680 gr_bes_crop_debug3_blendopt_fill_override_disabled_f());
681 }
682 gk20a_writel(g, gr_bes_crop_debug3_r(), val);
683}
684
685int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
686 u32 class_num, u32 offset, u32 data)
687{
688 gk20a_dbg_fn("");
689
690 if (class_num == PASCAL_COMPUTE_A) {
691 switch (offset << 2) {
692 case NVC0C0_SET_SHADER_EXCEPTIONS:
693 gk20a_gr_set_shader_exceptions(g, data);
694 break;
695 case NVC0C0_SET_RD_COALESCE:
696 gr_gm20b_set_rd_coalesce(g, data);
697 break;
698 default:
699 goto fail;
700 }
701 }
702
703 if (class_num == PASCAL_A) {
704 switch (offset << 2) {
705 case NVC097_SET_SHADER_EXCEPTIONS:
706 gk20a_gr_set_shader_exceptions(g, data);
707 break;
708 case NVC097_SET_CIRCULAR_BUFFER_SIZE:
709 g->ops.gr.set_circular_buffer_size(g, data);
710 break;
711 case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
712 g->ops.gr.set_alpha_circular_buffer_size(g, data);
713 break;
714 case NVC097_SET_GO_IDLE_TIMEOUT:
715 gr_gp10b_set_go_idle_timeout(g, data);
716 break;
717 case NVC097_SET_COALESCE_BUFFER_SIZE:
718 gr_gp10b_set_coalesce_buffer_size(g, data);
719 break;
720 case NVC097_SET_RD_COALESCE:
721 gr_gm20b_set_rd_coalesce(g, data);
722 break;
723 case NVC097_SET_BES_CROP_DEBUG3:
724 g->ops.gr.set_bes_crop_debug3(g, data);
725 break;
726 default:
727 goto fail;
728 }
729 }
730 return 0;
731
732fail:
733 return -EINVAL;
734}
735
736void gr_gp10b_cb_size_default(struct gk20a *g)
737{
738 struct gr_gk20a *gr = &g->gr;
739
740 if (!gr->attrib_cb_default_size)
741 gr->attrib_cb_default_size = 0x800;
742 gr->alpha_cb_default_size =
743 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
744}
745
746void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
747{
748 struct gr_gk20a *gr = &g->gr;
749 u32 gpc_index, ppc_index, stride, val;
750 u32 pd_ab_max_output;
751 u32 alpha_cb_size = data * 4;
752 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
753 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
754
755 gk20a_dbg_fn("");
756
757 if (alpha_cb_size > gr->alpha_cb_size)
758 alpha_cb_size = gr->alpha_cb_size;
759
760 gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
761 (gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) &
762 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
763 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
764
765 pd_ab_max_output = alpha_cb_size *
766 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
767 gr_pd_ab_dist_cfg1_max_output_granularity_v();
768
769 if (g->gr.pd_max_batches) {
770 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
771 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
772 gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches));
773 } else {
774 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
775 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
776 gr_pd_ab_dist_cfg1_max_batches_init_f());
777 }
778
779 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
780 stride = gpc_stride * gpc_index;
781
782 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
783 ppc_index++) {
784
785 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
786 stride +
787 ppc_in_gpc_stride * ppc_index);
788
789 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
790 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
791 gr->pes_tpc_count[ppc_index][gpc_index]));
792
793 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
794 stride +
795 ppc_in_gpc_stride * ppc_index, val);
796 }
797 }
798}
799
800void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
801{
802 struct gr_gk20a *gr = &g->gr;
803 u32 gpc_index, ppc_index, stride, val;
804 u32 cb_size_steady = data * 4, cb_size;
805 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
806 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
807
808 gk20a_dbg_fn("");
809
810 if (cb_size_steady > gr->attrib_cb_size)
811 cb_size_steady = gr->attrib_cb_size;
812 if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
813 gk20a_readl(g,
814 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) {
815 cb_size = cb_size_steady +
816 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
817 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
818 } else {
819 cb_size = cb_size_steady;
820 }
821
822 gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(),
823 (gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
824 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
825 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
826
827 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
828 stride = gpc_stride * gpc_index;
829
830 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
831 ppc_index++) {
832
833 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
834 stride +
835 ppc_in_gpc_stride * ppc_index);
836
837 val = set_field(val,
838 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
839 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
840 gr->pes_tpc_count[ppc_index][gpc_index]));
841
842 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
843 stride +
844 ppc_in_gpc_stride * ppc_index, val);
845
846 gk20a_writel(g, ppc_in_gpc_stride * ppc_index +
847 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
848 stride,
849 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
850 cb_size_steady));
851
852 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
853 ppc_index + gpc_index));
854
855 val = set_field(val,
856 gr_gpcs_swdx_tc_beta_cb_size_v_m(),
857 gr_gpcs_swdx_tc_beta_cb_size_v_f(
858 cb_size_steady *
859 gr->gpc_ppc_count[gpc_index]));
860
861 gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
862 ppc_index + gpc_index), val);
863 }
864 }
865}
866
867int gr_gp10b_init_ctx_state(struct gk20a *g)
868{
869 struct fecs_method_op_gk20a op = {
870 .mailbox = { .id = 0, .data = 0,
871 .clr = ~0, .ok = 0, .fail = 0},
872 .method.data = 0,
873 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
874 .cond.fail = GR_IS_UCODE_OP_SKIP,
875 };
876 int err;
877
878 gk20a_dbg_fn("");
879
880 err = gr_gk20a_init_ctx_state(g);
881 if (err)
882 return err;
883
884 if (!g->gr.t18x.ctx_vars.preempt_image_size) {
885 op.method.addr =
886 gr_fecs_method_push_adr_discover_preemption_image_size_v();
887 op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
888 err = gr_gk20a_submit_fecs_method_op(g, op, false);
889 if (err) {
890 nvgpu_err(g, "query preempt image size failed");
891 return err;
892 }
893 }
894
895 gk20a_dbg_info("preempt image size: %u",
896 g->gr.t18x.ctx_vars.preempt_image_size);
897
898 gk20a_dbg_fn("done");
899
900 return 0;
901}
902
903int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
904 struct nvgpu_mem *mem)
905{
906 int err;
907
908 gk20a_dbg_fn("");
909
910 err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem);
911 if (err)
912 return err;
913
914 mem->gpu_va = nvgpu_gmmu_map(vm,
915 mem,
916 mem->aligned_size,
917 NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE,
918 gk20a_mem_flag_none,
919 false,
920 mem->aperture);
921
922 if (!mem->gpu_va) {
923 err = -ENOMEM;
924 goto fail_free;
925 }
926
927 return 0;
928
929fail_free:
930 nvgpu_dma_free(vm->mm->g, mem);
931 return err;
932}
933
934int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
935 struct gr_ctx_desc *gr_ctx,
936 struct vm_gk20a *vm, u32 class,
937 u32 graphics_preempt_mode,
938 u32 compute_preempt_mode)
939{
940 int err = 0;
941
942 if (g->ops.gr.is_valid_gfx_class(g, class) &&
943 g->gr.t18x.ctx_vars.force_preemption_gfxp)
944 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
945
946 if (g->ops.gr.is_valid_compute_class(g, class) &&
947 g->gr.t18x.ctx_vars.force_preemption_cilp)
948 compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
949
950 /* check for invalid combinations */
951 if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
952 return -EINVAL;
953
954 if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
955 (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
956 return -EINVAL;
957
958 /* Do not allow lower preemption modes than current ones */
959 if (graphics_preempt_mode &&
960 (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
961 return -EINVAL;
962
963 if (compute_preempt_mode &&
964 (compute_preempt_mode < gr_ctx->compute_preempt_mode))
965 return -EINVAL;
966
967 /* set preemption modes */
968 switch (graphics_preempt_mode) {
969 case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
970 {
971 u32 spill_size =
972 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
973 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
974 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
975 gr_scc_pagepool_total_pages_byte_granularity_v();
976 u32 betacb_size = g->gr.attrib_cb_default_size +
977 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
978 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
979 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
980 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
981 g->gr.max_tpc_count;
982 attrib_cb_size = ALIGN(attrib_cb_size, 128);
983
984 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
985 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
986 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
987 attrib_cb_size);
988
989 err = gr_gp10b_alloc_buffer(vm,
990 g->gr.t18x.ctx_vars.preempt_image_size,
991 &gr_ctx->t18x.preempt_ctxsw_buffer);
992 if (err) {
993 nvgpu_err(g, "cannot allocate preempt buffer");
994 goto fail;
995 }
996
997 err = gr_gp10b_alloc_buffer(vm,
998 spill_size,
999 &gr_ctx->t18x.spill_ctxsw_buffer);
1000 if (err) {
1001 nvgpu_err(g, "cannot allocate spill buffer");
1002 goto fail_free_preempt;
1003 }
1004
1005 err = gr_gp10b_alloc_buffer(vm,
1006 attrib_cb_size,
1007 &gr_ctx->t18x.betacb_ctxsw_buffer);
1008 if (err) {
1009 nvgpu_err(g, "cannot allocate beta buffer");
1010 goto fail_free_spill;
1011 }
1012
1013 err = gr_gp10b_alloc_buffer(vm,
1014 pagepool_size,
1015 &gr_ctx->t18x.pagepool_ctxsw_buffer);
1016 if (err) {
1017 nvgpu_err(g, "cannot allocate page pool");
1018 goto fail_free_betacb;
1019 }
1020
1021 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
1022 break;
1023 }
1024
1025 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
1026 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
1027 break;
1028
1029 default:
1030 break;
1031 }
1032
1033 if (g->ops.gr.is_valid_compute_class(g, class) ||
1034 g->ops.gr.is_valid_gfx_class(g, class)) {
1035 switch (compute_preempt_mode) {
1036 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
1037 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
1038 case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
1039 gr_ctx->compute_preempt_mode = compute_preempt_mode;
1040 break;
1041 default:
1042 break;
1043 }
1044 }
1045
1046 return 0;
1047
1048fail_free_betacb:
1049 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
1050fail_free_spill:
1051 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
1052fail_free_preempt:
1053 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
1054fail:
1055 return err;
1056}
1057
1058int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1059 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
1060 u32 class,
1061 u32 flags)
1062{
1063 int err;
1064 u32 graphics_preempt_mode = 0;
1065 u32 compute_preempt_mode = 0;
1066
1067 gk20a_dbg_fn("");
1068
1069 err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
1070 if (err)
1071 return err;
1072
1073 (*gr_ctx)->t18x.ctx_id_valid = false;
1074
1075 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
1076 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
1077 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP)
1078 compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
1079
1080 if (graphics_preempt_mode || compute_preempt_mode) {
1081 if (g->ops.gr.set_ctxsw_preemption_mode) {
1082 err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm,
1083 class, graphics_preempt_mode, compute_preempt_mode);
1084 if (err) {
1085 nvgpu_err(g, "set_ctxsw_preemption_mode failed");
1086 goto fail_free_gk20a_ctx;
1087 }
1088 } else
1089 goto fail_free_gk20a_ctx;
1090 }
1091
1092 gk20a_dbg_fn("done");
1093
1094 return 0;
1095
1096fail_free_gk20a_ctx:
1097 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
1098 *gr_ctx = NULL;
1099
1100 return err;
1101}
1102
1103static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
1104 struct gr_ctx_desc *gr_ctx)
1105{
1106 struct nvgpu_mem *mem = &gr_ctx->mem;
1107
1108 if (nvgpu_mem_begin(g, mem)) {
1109 WARN_ON("Cannot map context");
1110 return;
1111 }
1112 nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
1113 nvgpu_mem_rd(g, mem,
1114 ctxsw_prog_main_image_magic_value_o()),
1115 ctxsw_prog_main_image_magic_value_v_value_v());
1116
1117 nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x",
1118 nvgpu_mem_rd(g, mem,
1119 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
1120
1121 nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x",
1122 nvgpu_mem_rd(g, mem,
1123 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
1124
1125 nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x",
1126 nvgpu_mem_rd(g, mem,
1127 ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
1128
1129 nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
1130 nvgpu_mem_rd(g, mem,
1131 ctxsw_prog_main_image_num_save_ops_o()));
1132 nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
1133 nvgpu_mem_rd(g, mem,
1134 ctxsw_prog_main_image_num_wfi_save_ops_o()));
1135 nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
1136 nvgpu_mem_rd(g, mem,
1137 ctxsw_prog_main_image_num_cta_save_ops_o()));
1138 nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
1139 nvgpu_mem_rd(g, mem,
1140 ctxsw_prog_main_image_num_gfxp_save_ops_o()));
1141 nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
1142 nvgpu_mem_rd(g, mem,
1143 ctxsw_prog_main_image_num_cilp_save_ops_o()));
1144 nvgpu_err(g,
1145 "image gfx preemption option (GFXP is 1) %x",
1146 nvgpu_mem_rd(g, mem,
1147 ctxsw_prog_main_image_graphics_preemption_options_o()));
1148 nvgpu_err(g,
1149 "image compute preemption option (CTA is 1) %x",
1150 nvgpu_mem_rd(g, mem,
1151 ctxsw_prog_main_image_compute_preemption_options_o()));
1152 nvgpu_mem_end(g, mem);
1153}
1154
1155void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
1156 struct gr_ctx_desc *gr_ctx)
1157{
1158 gk20a_dbg_fn("");
1159
1160 if (!gr_ctx)
1161 return;
1162
1163 if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
1164 dump_ctx_switch_stats(g, vm, gr_ctx);
1165
1166 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
1167 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
1168 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
1169 nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
1170 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
1171 gk20a_dbg_fn("done");
1172}
1173
1174
1175void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1176 struct channel_ctx_gk20a *ch_ctx,
1177 struct nvgpu_mem *mem)
1178{
1179 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1180 struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
1181 struct nvgpu_mem *ctxheader = &ctx->mem;
1182
1183 u32 gfxp_preempt_option =
1184 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1185 u32 cilp_preempt_option =
1186 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
1187 u32 cta_preempt_option =
1188 ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
1189 int err;
1190
1191 gk20a_dbg_fn("");
1192
1193 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1194 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1195 nvgpu_mem_wr(g, mem,
1196 ctxsw_prog_main_image_graphics_preemption_options_o(),
1197 gfxp_preempt_option);
1198 }
1199
1200 if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
1201 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
1202 nvgpu_mem_wr(g, mem,
1203 ctxsw_prog_main_image_compute_preemption_options_o(),
1204 cilp_preempt_option);
1205 }
1206
1207 if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
1208 gk20a_dbg_info("CTA: %x", cta_preempt_option);
1209 nvgpu_mem_wr(g, mem,
1210 ctxsw_prog_main_image_compute_preemption_options_o(),
1211 cta_preempt_option);
1212 }
1213
1214 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
1215 u32 addr;
1216 u32 size;
1217 u32 cbes_reserve;
1218
1219 if (g->ops.gr.set_preemption_buffer_va) {
1220 if (ctxheader->gpu_va)
1221 g->ops.gr.set_preemption_buffer_va(g, ctxheader,
1222 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
1223 else
1224 g->ops.gr.set_preemption_buffer_va(g, mem,
1225 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
1226 }
1227
1228 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
1229 if (err) {
1230 nvgpu_err(g, "can't map patch context");
1231 goto out;
1232 }
1233
1234 addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
1235 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1236 (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
1237 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1238
1239 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1240 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
1241
1242 addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
1243 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
1244 (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
1245 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
1246 size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
1247
1248 if (size == g->ops.gr.pagepool_default_size(g))
1249 size = gr_scc_pagepool_total_pages_hwmax_v();
1250
1251 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
1252
1253 addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
1254 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
1255 (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
1256 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
1257 size = gr_ctx->t18x.spill_ctxsw_buffer.size /
1258 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1259
1260 gr_gk20a_ctx_patch_write(g, ch_ctx,
1261 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1262 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1263 true);
1264 gr_gk20a_ctx_patch_write(g, ch_ctx,
1265 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1266 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1267 true);
1268
1269 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1270 gr_gk20a_ctx_patch_write(g, ch_ctx,
1271 gr_gpcs_swdx_beta_cb_ctrl_r(),
1272 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1273 cbes_reserve),
1274 true);
1275 gr_gk20a_ctx_patch_write(g, ch_ctx,
1276 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1277 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1278 cbes_reserve),
1279 true);
1280
1281 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
1282 }
1283
1284out:
1285 gk20a_dbg_fn("done");
1286}
1287
1288int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
1289 struct gk20a_debug_output *o)
1290{
1291 struct gr_gk20a *gr = &g->gr;
1292 u32 gr_engine_id;
1293
1294 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
1295
1296 gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
1297 gk20a_readl(g, gr_status_r()));
1298 gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
1299 gk20a_readl(g, gr_status_1_r()));
1300 gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
1301 gk20a_readl(g, gr_status_2_r()));
1302 gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
1303 gk20a_readl(g, gr_engine_status_r()));
1304 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
1305 gk20a_readl(g, gr_gpfifo_status_r()));
1306 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
1307 gk20a_readl(g, gr_gpfifo_ctl_r()));
1308 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
1309 gk20a_readl(g, gr_fecs_host_int_status_r()));
1310 gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n",
1311 gk20a_readl(g, gr_exception_r()));
1312 gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n",
1313 gk20a_readl(g, gr_fecs_intr_r()));
1314 gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
1315 gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
1316 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
1317 gk20a_readl(g, gr_activity_0_r()));
1318 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
1319 gk20a_readl(g, gr_activity_1_r()));
1320 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
1321 gk20a_readl(g, gr_activity_2_r()));
1322 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
1323 gk20a_readl(g, gr_activity_4_r()));
1324 gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
1325 gk20a_readl(g, gr_pri_sked_activity_r()));
1326 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1327 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
1328 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1329 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
1330 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1331 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
1332 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1333 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
1334 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1335 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
1336 if (gr->gpc_tpc_count && gr->gpc_tpc_count[0] == 2)
1337 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1338 gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
1339 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1340 gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
1341 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1342 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
1343 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1344 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
1345 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1346 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
1347 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1348 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
1349 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1350 gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
1351 if (gr->gpc_tpc_count && gr->gpc_tpc_count[0] == 2)
1352 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1353 gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
1354 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1355 gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
1356 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
1357 gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
1358 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
1359 gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
1360 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
1361 gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
1362 gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
1363 gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
1364 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
1365 gk20a_readl(g, gr_fe_go_idle_timeout_r()));
1366 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
1367 gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
1368 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
1369 gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
1370 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
1371 gk20a_readl(g, gr_cwd_fs_r()));
1372 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n",
1373 gk20a_readl(g, gr_fe_tpc_fs_r()));
1374 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n",
1375 gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
1376 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
1377 gk20a_readl(g, gr_cwd_sm_id_r(0)));
1378 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
1379 gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
1380 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
1381 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
1382 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
1383 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
1384 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
1385 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
1386 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
1387 gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
1388 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
1389 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
1390 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
1391 gk20a_readl(g, gr_fecs_current_ctx_r()));
1392 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
1393 gk20a_readl(g, gr_fecs_new_ctx_r()));
1394 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
1395 gk20a_readl(g, gr_pri_be0_crop_status1_r()));
1396 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
1397 gk20a_readl(g, gr_pri_bes_crop_status1_r()));
1398 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
1399 gk20a_readl(g, gr_pri_be0_zrop_status_r()));
1400 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
1401 gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
1402 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
1403 gk20a_readl(g, gr_pri_bes_zrop_status_r()));
1404 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
1405 gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
1406 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
1407 gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
1408 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
1409 gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
1410 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
1411 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
1412 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
1413 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
1414 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
1415 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
1416 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
1417 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
1418 return 0;
1419}
1420
1421static bool gr_activity_empty_or_preempted(u32 val)
1422{
1423 while(val) {
1424 u32 v = val & 7;
1425 if (v != gr_activity_4_gpc0_empty_v() &&
1426 v != gr_activity_4_gpc0_preempted_v())
1427 return false;
1428 val >>= 3;
1429 }
1430
1431 return true;
1432}
1433
1434int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1435 u32 expect_delay)
1436{
1437 u32 delay = expect_delay;
1438 bool gr_enabled;
1439 bool ctxsw_active;
1440 bool gr_busy;
1441 u32 gr_status;
1442 u32 activity0, activity1, activity2, activity4;
1443 struct nvgpu_timeout timeout;
1444
1445 gk20a_dbg_fn("");
1446
1447 nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER);
1448
1449 do {
1450 /* fmodel: host gets fifo_engine_status(gr) from gr
1451 only when gr_status is read */
1452 gr_status = gk20a_readl(g, gr_status_r());
1453
1454 gr_enabled = gk20a_readl(g, mc_enable_r()) &
1455 mc_enable_pgraph_enabled_f();
1456
1457 ctxsw_active = gr_status & 1<<7;
1458
1459 activity0 = gk20a_readl(g, gr_activity_0_r());
1460 activity1 = gk20a_readl(g, gr_activity_1_r());
1461 activity2 = gk20a_readl(g, gr_activity_2_r());
1462 activity4 = gk20a_readl(g, gr_activity_4_r());
1463
1464 gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
1465 gr_activity_empty_or_preempted(activity1) &&
1466 activity2 == 0 &&
1467 gr_activity_empty_or_preempted(activity4));
1468
1469 if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
1470 gk20a_dbg_fn("done");
1471 return 0;
1472 }
1473
1474 nvgpu_usleep_range(delay, delay * 2);
1475 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
1476 } while (!nvgpu_timeout_expired(&timeout));
1477
1478 nvgpu_err(g,
1479 "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
1480 ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
1481
1482 return -EAGAIN;
1483}
1484
1485void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1486 struct channel_ctx_gk20a *ch_ctx,
1487 u64 addr, bool patch)
1488{
1489 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1490 int attrBufferSize;
1491
1492 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va)
1493 attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size;
1494 else
1495 attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
1496
1497 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1498
1499 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
1500
1501 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1502 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1503 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1504
1505 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1506 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1507
1508 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1509 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1510 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1511}
1512
1513void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1514 struct channel_ctx_gk20a *ch_ctx,
1515 u64 addr, u64 size, bool patch)
1516{
1517 u32 data;
1518
1519 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
1520 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1521
1522 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
1523 gr_scc_bundle_cb_size_div_256b_f(size) |
1524 gr_scc_bundle_cb_size_valid_true_f(), patch);
1525
1526 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1527 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1528
1529 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1530 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1531 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1532
1533 /* data for state_limit */
1534 data = (g->gr.bundle_cb_default_size *
1535 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
1536 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
1537
1538 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
1539
1540 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1541 g->gr.bundle_cb_token_limit, data);
1542
1543 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
1544 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1545 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1546}
1547
1548int gr_gp10b_load_smid_config(struct gk20a *g)
1549{
1550 u32 *tpc_sm_id;
1551 u32 i, j;
1552 u32 tpc_index, gpc_index;
1553 u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
1554
1555 tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32));
1556 if (!tpc_sm_id)
1557 return -ENOMEM;
1558
1559 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
1560 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
1561 u32 reg = 0;
1562 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
1563 gr_cwd_gpc_tpc_id_tpc0_s();
1564
1565 for (j = 0; j < 4; j++) {
1566 u32 sm_id = (i * 4) + j;
1567 u32 bits;
1568
1569 if (sm_id >= g->gr.tpc_count)
1570 break;
1571
1572 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1573 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1574
1575 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
1576 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
1577 reg |= bits << (j * bit_stride);
1578
1579 tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4) >> 2)]
1580 |= sm_id << (bit_stride * (tpc_index & 3));
1581 }
1582 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
1583 }
1584
1585 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1586 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1587
1588 nvgpu_kfree(g, tpc_sm_id);
1589
1590 return 0;
1591}
1592
1593int gr_gp10b_init_fs_state(struct gk20a *g)
1594{
1595 u32 data;
1596
1597 gk20a_dbg_fn("");
1598
1599 data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
1600 data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
1601 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
1602 gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data);
1603
1604 data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
1605 data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(),
1606 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
1607 gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
1608
1609 if (g->gr.t18x.fecs_feature_override_ecc_val != 0) {
1610 gk20a_writel(g,
1611 gr_fecs_feature_override_ecc_r(),
1612 g->gr.t18x.fecs_feature_override_ecc_val);
1613 }
1614
1615 return gr_gm20b_init_fs_state(g);
1616}
1617
1618void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1619{
1620 nvgpu_tegra_fuse_write_bypass(g, 0x1);
1621 nvgpu_tegra_fuse_write_access_sw(g, 0x0);
1622
1623 if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
1624 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2);
1625 else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2)
1626 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
1627 else
1628 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
1629}
1630
1631void gr_gp10b_get_access_map(struct gk20a *g,
1632 u32 **whitelist, int *num_entries)
1633{
1634 static u32 wl_addr_gp10b[] = {
1635 /* this list must be sorted (low to high) */
1636 0x404468, /* gr_pri_mme_max_instructions */
1637 0x418300, /* gr_pri_gpcs_rasterarb_line_class */
1638 0x418800, /* gr_pri_gpcs_setup_debug */
1639 0x418e00, /* gr_pri_gpcs_swdx_config */
1640 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1641 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1642 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1643 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1644 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1645 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1646 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1647 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1648 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1649 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1650 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1651 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1652 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1653 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1654 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1655 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1656 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1657 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1658 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1659 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1660 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1661 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
1662 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
1663 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
1664 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
1665 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
1666 };
1667
1668 *whitelist = wl_addr_gp10b;
1669 *num_entries = ARRAY_SIZE(wl_addr_gp10b);
1670}
1671
1672static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
1673{
1674 int ret = 0;
1675
1676 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1677
1678 ret = gk20a_disable_channel_tsg(g, fault_ch);
1679 if (ret) {
1680 nvgpu_err(g,
1681 "CILP: failed to disable channel/TSG!");
1682 return ret;
1683 }
1684
1685 ret = g->ops.fifo.update_runlist(g, fault_ch->runlist_id, ~0, true, false);
1686 if (ret) {
1687 nvgpu_err(g,
1688 "CILP: failed to restart runlist 0!");
1689 return ret;
1690 }
1691
1692 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
1693
1694 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1695 "CILP: tsgid: 0x%x", fault_ch->tsgid);
1696
1697 if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
1698 gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
1699 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1700 "CILP: preempted tsg");
1701 } else {
1702 gk20a_fifo_issue_preempt(g, fault_ch->chid, false);
1703 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1704 "CILP: preempted channel");
1705 }
1706
1707 return ret;
1708}
1709
1710int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
1711 struct channel_gk20a *fault_ch)
1712{
1713 int ret;
1714 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1715
1716 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1717
1718 if (!gr_ctx)
1719 return -EINVAL;
1720
1721 if (gr_ctx->t18x.cilp_preempt_pending) {
1722 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1723 "CILP is already pending for chid %d",
1724 fault_ch->chid);
1725 return 0;
1726 }
1727
1728 /* get ctx_id from the ucode image */
1729 if (!gr_ctx->t18x.ctx_id_valid) {
1730 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1731 "CILP: looking up ctx id");
1732 ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
1733 if (ret) {
1734 nvgpu_err(g, "CILP: error looking up ctx id!");
1735 return ret;
1736 }
1737 gr_ctx->t18x.ctx_id_valid = true;
1738 }
1739
1740 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1741 "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
1742
1743 /* send ucode method to set ctxsw interrupt */
1744 ret = gr_gk20a_submit_fecs_sideband_method_op(g,
1745 (struct fecs_method_op_gk20a) {
1746 .method.data = gr_ctx->t18x.ctx_id,
1747 .method.addr =
1748 gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
1749 .mailbox = {
1750 .id = 1 /* sideband */, .data = 0,
1751 .clr = ~0, .ret = NULL,
1752 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
1753 .fail = 0},
1754 .cond.ok = GR_IS_UCODE_OP_EQUAL,
1755 .cond.fail = GR_IS_UCODE_OP_SKIP});
1756
1757 if (ret) {
1758 nvgpu_err(g, "CILP: failed to enable ctxsw interrupt!");
1759 return ret;
1760 }
1761
1762 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1763 "CILP: enabled ctxsw completion interrupt");
1764
1765 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1766 "CILP: disabling channel %d",
1767 fault_ch->chid);
1768
1769 ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
1770 if (ret) {
1771 nvgpu_err(g, "CILP: failed to disable channel!!");
1772 return ret;
1773 }
1774
1775 /* set cilp_preempt_pending = true and record the channel */
1776 gr_ctx->t18x.cilp_preempt_pending = true;
1777 g->gr.t18x.cilp_preempt_pending_chid = fault_ch->chid;
1778
1779 if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
1780 struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid];
1781
1782 gk20a_tsg_event_id_post_event(tsg,
1783 NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED);
1784 } else {
1785 gk20a_channel_event_id_post_event(fault_ch,
1786 NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED);
1787 }
1788
1789 return 0;
1790}
1791
1792static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
1793 struct channel_gk20a *fault_ch)
1794{
1795 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1796
1797 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1798
1799 if (!gr_ctx)
1800 return -EINVAL;
1801
1802 /* The ucode is self-clearing, so all we need to do here is
1803 to clear cilp_preempt_pending. */
1804 if (!gr_ctx->t18x.cilp_preempt_pending) {
1805 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1806 "CILP is already cleared for chid %d\n",
1807 fault_ch->chid);
1808 return 0;
1809 }
1810
1811 gr_ctx->t18x.cilp_preempt_pending = false;
1812 g->gr.t18x.cilp_preempt_pending_chid = -1;
1813
1814 return 0;
1815}
1816
1817/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1818 *
1819 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
1820 */
1821int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1822 u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
1823 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
1824 bool *early_exit, bool *ignore_debugger)
1825{
1826 int ret;
1827 bool cilp_enabled = false;
1828 u32 global_mask = 0, dbgr_control0, global_esr_copy;
1829 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1830 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1831 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1832
1833 *early_exit = false;
1834 *ignore_debugger = false;
1835
1836 if (fault_ch)
1837 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
1838 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
1839
1840 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
1841 gpc, tpc, global_esr);
1842
1843 if (cilp_enabled && sm_debugger_attached) {
1844 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
1845 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1846 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
1847
1848 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
1849 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1850 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
1851
1852 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() |
1853 gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() |
1854 gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() |
1855 gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() |
1856 gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() |
1857 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
1858
1859 if (warp_esr != 0 || (global_esr & global_mask) != 0) {
1860 *ignore_debugger = true;
1861
1862 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1863 "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
1864 gpc, tpc);
1865
1866 if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
1867 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1868 "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
1869 gpc, tpc);
1870 g->ops.gr.suspend_all_sms(g, global_mask, false);
1871
1872 gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
1873 } else {
1874 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1875 "CILP: STOP_TRIGGER from gpc %d tpc %d\n",
1876 gpc, tpc);
1877 g->ops.gr.suspend_single_sm(g, gpc, tpc, sm, global_mask, true);
1878 }
1879
1880 /* reset the HWW errors after locking down */
1881 global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g,
1882 gpc, tpc, sm);
1883 g->ops.gr.clear_sm_hww(g,
1884 gpc, tpc, sm, global_esr_copy);
1885 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1886 "CILP: HWWs cleared for gpc %d tpc %d\n",
1887 gpc, tpc);
1888
1889 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
1890 ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
1891 if (ret) {
1892 nvgpu_err(g, "CILP: error while setting CILP preempt pending!");
1893 return ret;
1894 }
1895
1896 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
1897 if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
1898 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1899 "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
1900 gpc, tpc);
1901 dbgr_control0 = set_field(dbgr_control0,
1902 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
1903 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
1904 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
1905 }
1906
1907 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1908 "CILP: resume for gpc %d tpc %d\n",
1909 gpc, tpc);
1910 g->ops.gr.resume_single_sm(g, gpc, tpc, sm);
1911
1912 *ignore_debugger = true;
1913 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
1914 }
1915
1916 *early_exit = true;
1917 }
1918 return 0;
1919}
1920
1921static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1922{
1923 struct gr_ctx_desc *gr_ctx;
1924 struct channel_gk20a *ch;
1925 int chid;
1926 int ret = -EINVAL;
1927
1928 chid = g->gr.t18x.cilp_preempt_pending_chid;
1929
1930 ch = gk20a_channel_get(gk20a_fifo_channel_from_chid(g, chid));
1931 if (!ch)
1932 return ret;
1933
1934 gr_ctx = ch->ch_ctx.gr_ctx;
1935
1936 if (gr_ctx->t18x.cilp_preempt_pending) {
1937 *__chid = chid;
1938 ret = 0;
1939 }
1940
1941 gk20a_channel_put(ch);
1942
1943 return ret;
1944}
1945
1946int gr_gp10b_handle_fecs_error(struct gk20a *g,
1947 struct channel_gk20a *__ch,
1948 struct gr_gk20a_isr_data *isr_data)
1949{
1950 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
1951 struct channel_gk20a *ch;
1952 int chid = -1;
1953 int ret = 0;
1954
1955 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1956
1957 /*
1958 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
1959 * indicates that a CILP ctxsw save has finished
1960 */
1961 if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
1962 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1963 "CILP: ctxsw save completed!\n");
1964
1965 /* now clear the interrupt */
1966 gk20a_writel(g, gr_fecs_host_int_clear_r(),
1967 gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
1968
1969 ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
1970 if (ret)
1971 goto clean_up;
1972
1973 ch = gk20a_channel_get(
1974 gk20a_fifo_channel_from_chid(g, chid));
1975 if (!ch)
1976 goto clean_up;
1977
1978
1979 /* set preempt_pending to false */
1980 ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
1981 if (ret) {
1982 nvgpu_err(g, "CILP: error while unsetting CILP preempt pending!");
1983 gk20a_channel_put(ch);
1984 goto clean_up;
1985 }
1986
1987 /* Post events to UMD */
1988 gk20a_dbg_gpu_post_events(ch);
1989
1990 if (gk20a_is_channel_marked_as_tsg(ch)) {
1991 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
1992
1993 gk20a_tsg_event_id_post_event(tsg,
1994 NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE);
1995 } else {
1996 gk20a_channel_event_id_post_event(ch,
1997 NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE);
1998 }
1999
2000 gk20a_channel_put(ch);
2001 }
2002
2003clean_up:
2004 /* handle any remaining interrupts */
2005 return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
2006}
2007
2008u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
2009 u32 gpc, u32 tpc, u32 sm)
2010{
2011 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
2012 u32 hww_warp_esr = gk20a_readl(g,
2013 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
2014
2015 if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
2016 hww_warp_esr = set_field(hww_warp_esr,
2017 gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_m(),
2018 gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_none_f());
2019
2020 return hww_warp_esr;
2021}
2022
2023u32 get_ecc_override_val(struct gk20a *g)
2024{
2025 u32 val;
2026
2027 val = gk20a_readl(g, fuse_opt_ecc_en_r());
2028 if (val)
2029 return gk20a_readl(g, gr_fecs_feature_override_ecc_r());
2030
2031 return 0;
2032}
2033
2034static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
2035 bool *cilp_preempt_pending)
2036{
2037 struct gk20a *g = ch->g;
2038 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
2039 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
2040 bool ctx_resident = false;
2041 int err = 0;
2042
2043 *cilp_preempt_pending = false;
2044
2045 if (gk20a_is_channel_ctx_resident(ch)) {
2046 g->ops.gr.suspend_all_sms(g, 0, false);
2047
2048 if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
2049 err = gr_gp10b_set_cilp_preempt_pending(g, ch);
2050 if (err)
2051 nvgpu_err(g, "unable to set CILP preempt pending");
2052 else
2053 *cilp_preempt_pending = true;
2054
2055 g->ops.gr.resume_all_sms(g);
2056 }
2057
2058 ctx_resident = true;
2059 } else {
2060 gk20a_disable_channel_tsg(g, ch);
2061 }
2062
2063 return ctx_resident;
2064}
2065
2066int gr_gp10b_suspend_contexts(struct gk20a *g,
2067 struct dbg_session_gk20a *dbg_s,
2068 int *ctx_resident_ch_fd)
2069{
2070 u32 delay = GR_IDLE_CHECK_DEFAULT;
2071 bool cilp_preempt_pending = false;
2072 struct channel_gk20a *cilp_preempt_pending_ch = NULL;
2073 struct channel_gk20a *ch;
2074 struct dbg_session_channel_data *ch_data;
2075 int err = 0;
2076 int local_ctx_resident_ch_fd = -1;
2077 bool ctx_resident;
2078
2079 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
2080
2081 err = gr_gk20a_disable_ctxsw(g);
2082 if (err) {
2083 nvgpu_err(g, "unable to stop gr ctxsw");
2084 nvgpu_mutex_release(&g->dbg_sessions_lock);
2085 goto clean_up;
2086 }
2087
2088 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
2089
2090 list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
2091 ch = g->fifo.channel + ch_data->chid;
2092
2093 ctx_resident = gr_gp10b_suspend_context(ch,
2094 &cilp_preempt_pending);
2095 if (ctx_resident)
2096 local_ctx_resident_ch_fd = ch_data->channel_fd;
2097 if (cilp_preempt_pending)
2098 cilp_preempt_pending_ch = ch;
2099 }
2100
2101 nvgpu_mutex_release(&dbg_s->ch_list_lock);
2102
2103 err = gr_gk20a_enable_ctxsw(g);
2104 if (err) {
2105 nvgpu_mutex_release(&g->dbg_sessions_lock);
2106 goto clean_up;
2107 }
2108
2109 nvgpu_mutex_release(&g->dbg_sessions_lock);
2110
2111 if (cilp_preempt_pending_ch) {
2112 struct channel_ctx_gk20a *ch_ctx =
2113 &cilp_preempt_pending_ch->ch_ctx;
2114 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
2115 struct nvgpu_timeout timeout;
2116
2117 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
2118 "CILP preempt pending, waiting %lu msecs for preemption",
2119 gk20a_get_gr_idle_timeout(g));
2120
2121 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
2122 NVGPU_TIMER_CPU_TIMER);
2123 do {
2124 if (!gr_ctx->t18x.cilp_preempt_pending)
2125 break;
2126
2127 nvgpu_usleep_range(delay, delay * 2);
2128 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2129 } while (!nvgpu_timeout_expired(&timeout));
2130
2131 /* If cilp is still pending at this point, timeout */
2132 if (gr_ctx->t18x.cilp_preempt_pending)
2133 err = -ETIMEDOUT;
2134 }
2135
2136 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
2137
2138clean_up:
2139 return err;
2140}
2141
2142int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
2143 bool boost)
2144{
2145 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
2146 struct gk20a *g = ch->g;
2147 struct nvgpu_mem *mem = &gr_ctx->mem;
2148 int err = 0;
2149
2150 gr_ctx->boosted_ctx = boost;
2151
2152 if (nvgpu_mem_begin(g, mem))
2153 return -ENOMEM;
2154
2155 err = gk20a_disable_channel_tsg(g, ch);
2156 if (err)
2157 goto unmap_ctx;
2158
2159 err = gk20a_fifo_preempt(g, ch);
2160 if (err)
2161 goto enable_ch;
2162
2163 if (g->ops.gr.update_boosted_ctx)
2164 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
2165 else
2166 err = -ENOSYS;
2167
2168enable_ch:
2169 gk20a_enable_channel_tsg(g, ch);
2170unmap_ctx:
2171 nvgpu_mem_end(g, mem);
2172
2173 return err;
2174}
2175
2176void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
2177 struct gr_ctx_desc *gr_ctx) {
2178 u32 v;
2179
2180 v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
2181 gr_ctx->boosted_ctx);
2182 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
2183}
2184
2185int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2186 u32 graphics_preempt_mode,
2187 u32 compute_preempt_mode)
2188{
2189 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
2190 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
2191 struct gk20a *g = ch->g;
2192 struct tsg_gk20a *tsg;
2193 struct vm_gk20a *vm;
2194 struct nvgpu_mem *mem = &gr_ctx->mem;
2195 struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header;
2196 struct nvgpu_mem *ctxheader = &ctx->mem;
2197 u32 class;
2198 int err = 0;
2199
2200 class = ch->obj_class;
2201 if (!class)
2202 return -EINVAL;
2203
2204 if (gk20a_is_channel_marked_as_tsg(ch)) {
2205 tsg = &g->fifo.tsg[ch->tsgid];
2206 vm = tsg->vm;
2207 } else {
2208 vm = ch->vm;
2209 }
2210
2211 /* skip setting anything if both modes are already set */
2212 if (graphics_preempt_mode &&
2213 (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
2214 graphics_preempt_mode = 0;
2215
2216 if (compute_preempt_mode &&
2217 (compute_preempt_mode == gr_ctx->compute_preempt_mode))
2218 compute_preempt_mode = 0;
2219
2220 if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
2221 return 0;
2222
2223 if (g->ops.gr.set_ctxsw_preemption_mode) {
2224
2225 gk20a_dbg(gpu_dbg_sched, "chid=%d tsgid=%d pid=%d "
2226 "graphics_preempt=%d compute_preempt=%d",
2227 ch->chid,
2228 ch->tsgid,
2229 ch->tgid,
2230 graphics_preempt_mode,
2231 compute_preempt_mode);
2232 err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
2233 graphics_preempt_mode, compute_preempt_mode);
2234 if (err) {
2235 nvgpu_err(g, "set_ctxsw_preemption_mode failed");
2236 return err;
2237 }
2238 }
2239
2240 if (nvgpu_mem_begin(g, mem))
2241 return -ENOMEM;
2242
2243 if (nvgpu_mem_begin(g, ctxheader))
2244 goto unamp_ctx_header;
2245
2246 err = gk20a_disable_channel_tsg(g, ch);
2247 if (err)
2248 goto unmap_ctx;
2249
2250 err = gk20a_fifo_preempt(g, ch);
2251 if (err)
2252 goto enable_ch;
2253
2254 if (g->ops.gr.update_ctxsw_preemption_mode) {
2255 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2256 ch_ctx, mem);
2257
2258 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
2259 if (err) {
2260 nvgpu_err(g, "can't map patch context");
2261 goto enable_ch;
2262 }
2263 g->ops.gr.commit_global_cb_manager(g, ch, true);
2264 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
2265 }
2266
2267enable_ch:
2268 gk20a_enable_channel_tsg(g, ch);
2269unmap_ctx:
2270 nvgpu_mem_end(g, ctxheader);
2271unamp_ctx_header:
2272 nvgpu_mem_end(g, mem);
2273
2274 return err;
2275}
2276
2277int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
2278 struct nvgpu_preemption_modes_rec *preemption_modes_rec)
2279{
2280 preemption_modes_rec->graphics_preemption_mode_flags = (
2281 NVGPU_PREEMPTION_MODE_GRAPHICS_WFI |
2282 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP);
2283 preemption_modes_rec->compute_preemption_mode_flags = (
2284 NVGPU_PREEMPTION_MODE_COMPUTE_WFI |
2285 NVGPU_PREEMPTION_MODE_COMPUTE_CTA |
2286 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
2287
2288 preemption_modes_rec->default_graphics_preempt_mode =
2289 NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
2290 preemption_modes_rec->default_compute_preempt_mode =
2291 NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
2292
2293 return 0;
2294}
2295
2296int gr_gp10b_init_preemption_state(struct gk20a *g)
2297{
2298 u32 debug_2;
2299 struct gr_gk20a *gr = &g->gr;
2300 u32 sysclk_cycles = gr->gfxp_wfi_timeout_count;
2301 gk20a_writel(g, gr_fe_gfxp_wfi_timeout_r(),
2302 gr_fe_gfxp_wfi_timeout_count_f(sysclk_cycles));
2303
2304 debug_2 = gk20a_readl(g, gr_debug_2_r());
2305 debug_2 = set_field(debug_2,
2306 gr_debug_2_gfxp_wfi_always_injects_wfi_m(),
2307 gr_debug_2_gfxp_wfi_always_injects_wfi_enabled_f());
2308 gk20a_writel(g, gr_debug_2_r(), debug_2);
2309
2310 return 0;
2311}
2312
2313void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
2314 struct nvgpu_mem *mem, u64 gpu_va)
2315{
2316 u32 va = u64_lo32(gpu_va >> 8);
2317
2318 nvgpu_mem_wr(g, mem,
2319 ctxsw_prog_main_image_full_preemption_ptr_o(), va);
2320
2321}
2322
2323void gr_gp10b_init_czf_bypass(struct gk20a *g)
2324{
2325 g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v();
2326}
2327
2328int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
2329{
2330 struct nvgpu_dbg_gpu_reg_op ops;
2331
2332 ops.op = REGOP(WRITE_32);
2333 ops.type = REGOP(TYPE_GR_CTX);
2334 ops.status = REGOP(STATUS_SUCCESS);
2335 ops.value_hi = 0;
2336 ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m();
2337 ops.and_n_mask_hi = 0;
2338 ops.offset = gr_gpc0_prop_debug1_r();
2339 ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f(
2340 g->gr.czf_bypass);
2341
2342 return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
2343}
2344
2345void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem)
2346{
2347 gk20a_gr_init_ctxsw_hdr_data(g, mem);
2348
2349 nvgpu_mem_wr(g, mem,
2350 ctxsw_prog_main_image_num_wfi_save_ops_o(), 0);
2351 nvgpu_mem_wr(g, mem,
2352 ctxsw_prog_main_image_num_cta_save_ops_o(), 0);
2353 nvgpu_mem_wr(g, mem,
2354 ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0);
2355 nvgpu_mem_wr(g, mem,
2356 ctxsw_prog_main_image_num_cilp_save_ops_o(), 0);
2357}