summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c2257
1 files changed, 2257 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
new file mode 100644
index 00000000..9de7d675
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -0,0 +1,2257 @@
1/*
2 * GP10B GPU GR
3 *
4 * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
17#include <linux/clk.h>
18#include <linux/delay.h>
19#include <linux/tegra-fuse.h>
20#include <linux/version.h>
21
22#include <dt-bindings/soc/gm20b-fuse.h>
23#include <dt-bindings/soc/gp10b-fuse.h>
24
25#include "gk20a/gr_gk20a.h"
26#include "gk20a/semaphore_gk20a.h"
27#include "gk20a/dbg_gpu_gk20a.h"
28
29#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
30#include "gp10b/gr_gp10b.h"
31#include "hw_gr_gp10b.h"
32#include "hw_fifo_gp10b.h"
33#include "hw_ctxsw_prog_gp10b.h"
34#include "hw_mc_gp10b.h"
35#include "gp10b_sysfs.h"
36#include <linux/vmalloc.h>
37
38#define NVGPU_GFXP_WFI_TIMEOUT_US 100LL
39
40static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
41{
42 bool valid = false;
43
44 switch (class_num) {
45 case PASCAL_COMPUTE_A:
46 case PASCAL_A:
47 case PASCAL_DMA_COPY_A:
48 valid = true;
49 break;
50
51 case MAXWELL_COMPUTE_B:
52 case MAXWELL_B:
53 case FERMI_TWOD_A:
54 case KEPLER_DMA_COPY_A:
55 case MAXWELL_DMA_COPY_A:
56 valid = true;
57 break;
58
59 default:
60 break;
61 }
62 gk20a_dbg_info("class=0x%x valid=%d", class_num, valid);
63 return valid;
64}
65
66static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
67 u32 sed_status,
68 u32 ded_status,
69 u32 *count_to_adjust,
70 u32 opposite_count)
71{
72 u32 over_count = 0;
73
74 sed_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b();
75 ded_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b();
76
77 /* One overcount for each partition on which a SBE occurred but not a
78 DBE (or vice-versa) */
79 if (single_err) {
80 over_count =
81 hweight32(sed_status & ~ded_status);
82 } else {
83 over_count =
84 hweight32(ded_status & ~sed_status);
85 }
86
87 /* If both a SBE and a DBE occur on the same partition, then we have an
88 overcount for the subpartition if the opposite error counts are
89 zero. */
90 if ((sed_status & ded_status) && (opposite_count == 0)) {
91 over_count +=
92 hweight32(sed_status & ded_status);
93 }
94
95 if (*count_to_adjust > over_count)
96 *count_to_adjust -= over_count;
97 else
98 *count_to_adjust = 0;
99}
100
101static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
102 bool *post_event, struct channel_gk20a *fault_ch,
103 u32 *hww_global_esr)
104{
105 int ret = 0;
106 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
107 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
108 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
109 u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status;
110 u32 lrf_single_count_delta, lrf_double_count_delta;
111 u32 shm_ecc_status;
112
113 gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
114
115 /* Check for LRF ECC errors. */
116 lrf_ecc_status = gk20a_readl(g,
117 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
118 lrf_ecc_sed_status = lrf_ecc_status &
119 (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() |
120 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f() |
121 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f() |
122 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f());
123 lrf_ecc_ded_status = lrf_ecc_status &
124 (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f() |
125 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f() |
126 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() |
127 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f());
128 lrf_single_count_delta =
129 gk20a_readl(g,
130 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() +
131 offset);
132 lrf_double_count_delta =
133 gk20a_readl(g,
134 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() +
135 offset);
136 gk20a_writel(g,
137 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
138 0);
139 gk20a_writel(g,
140 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
141 0);
142 if (lrf_ecc_sed_status) {
143 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
144 "Single bit error detected in SM LRF!");
145
146 gr_gp10b_sm_lrf_ecc_overcount_war(1,
147 lrf_ecc_sed_status,
148 lrf_ecc_ded_status,
149 &lrf_single_count_delta,
150 lrf_double_count_delta);
151 g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] +=
152 lrf_single_count_delta;
153 }
154 if (lrf_ecc_ded_status) {
155 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
156 "Double bit error detected in SM LRF!");
157
158 gr_gp10b_sm_lrf_ecc_overcount_war(0,
159 lrf_ecc_sed_status,
160 lrf_ecc_ded_status,
161 &lrf_double_count_delta,
162 lrf_single_count_delta);
163 g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] +=
164 lrf_double_count_delta;
165 }
166 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
167 lrf_ecc_status);
168
169 /* Check for SHM ECC errors. */
170 shm_ecc_status = gk20a_readl(g,
171 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
172 if ((shm_ecc_status &
173 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) ||
174 (shm_ecc_status &
175 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) ||
176 (shm_ecc_status &
177 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
178 (shm_ecc_status &
179 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
180 u32 ecc_stats_reg_val;
181
182 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
183 "Single bit error detected in SM SHM!");
184
185 ecc_stats_reg_val =
186 gk20a_readl(g,
187 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
188 g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] +=
189 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
190 g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] +=
191 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
192 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
193 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
194 gk20a_writel(g,
195 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
196 ecc_stats_reg_val);
197 }
198 if ( (shm_ecc_status &
199 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
200 (shm_ecc_status &
201 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
202 u32 ecc_stats_reg_val;
203
204 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
205 "Double bit error detected in SM SHM!");
206
207 ecc_stats_reg_val =
208 gk20a_readl(g,
209 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
210 g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] +=
211 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
212 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
213 gk20a_writel(g,
214 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
215 ecc_stats_reg_val);
216 }
217 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
218 shm_ecc_status);
219
220
221 return ret;
222}
223
224static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
225 bool *post_event)
226{
227 int ret = 0;
228 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
229 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
230 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
231 u32 esr;
232 u32 ecc_stats_reg_val;
233
234 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
235
236 esr = gk20a_readl(g,
237 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
238 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
239
240 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
241 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
242 "Single bit error detected in TEX!");
243
244 /* Pipe 0 counters */
245 gk20a_writel(g,
246 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
247 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
248
249 ecc_stats_reg_val = gk20a_readl(g,
250 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
251 g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] +=
252 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
253 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
254 gk20a_writel(g,
255 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
256 ecc_stats_reg_val);
257
258 ecc_stats_reg_val = gk20a_readl(g,
259 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
260 g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] +=
261 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
262 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
263 gk20a_writel(g,
264 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
265 ecc_stats_reg_val);
266
267
268 /* Pipe 1 counters */
269 gk20a_writel(g,
270 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
271 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
272
273 ecc_stats_reg_val = gk20a_readl(g,
274 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
275 g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] +=
276 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
277 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
278 gk20a_writel(g,
279 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
280 ecc_stats_reg_val);
281
282 ecc_stats_reg_val = gk20a_readl(g,
283 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
284 g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] +=
285 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
286 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
287 gk20a_writel(g,
288 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
289 ecc_stats_reg_val);
290
291
292 gk20a_writel(g,
293 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
294 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
295 }
296 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
297 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
298 "Double bit error detected in TEX!");
299
300 /* Pipe 0 counters */
301 gk20a_writel(g,
302 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
303 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
304
305 ecc_stats_reg_val = gk20a_readl(g,
306 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
307 g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] +=
308 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
309 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
310 gk20a_writel(g,
311 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
312 ecc_stats_reg_val);
313
314 ecc_stats_reg_val = gk20a_readl(g,
315 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
316 g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] +=
317 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
318 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
319 gk20a_writel(g,
320 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
321 ecc_stats_reg_val);
322
323
324 /* Pipe 1 counters */
325 gk20a_writel(g,
326 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
327 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
328
329 ecc_stats_reg_val = gk20a_readl(g,
330 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
331 g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] +=
332 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
333 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
334 gk20a_writel(g,
335 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
336 ecc_stats_reg_val);
337
338 ecc_stats_reg_val = gk20a_readl(g,
339 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
340 g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] +=
341 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
342 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
343 gk20a_writel(g,
344 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
345 ecc_stats_reg_val);
346
347
348 gk20a_writel(g,
349 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
350 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
351 }
352
353 gk20a_writel(g,
354 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
355 esr | gr_gpc0_tpc0_tex_m_hww_esr_reset_active_f());
356
357 return ret;
358}
359
360static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
361 struct channel_gk20a *c, bool patch)
362{
363 struct gr_gk20a *gr = &g->gr;
364 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
365 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
366 u32 attrib_offset_in_chunk = 0;
367 u32 alpha_offset_in_chunk = 0;
368 u32 pd_ab_max_output;
369 u32 gpc_index, ppc_index;
370 u32 temp, temp2;
371 u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
372 u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
373 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
374 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
375 u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
376
377 gk20a_dbg_fn("");
378
379 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
380 attrib_size_in_chunk = gr->attrib_cb_default_size +
381 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
382 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
383 cb_attrib_cache_size_init = gr->attrib_cb_default_size +
384 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
385 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
386 } else {
387 attrib_size_in_chunk = gr->attrib_cb_size;
388 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
389 }
390
391 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
392 gr->attrib_cb_default_size, patch);
393 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(),
394 gr->alpha_cb_default_size, patch);
395
396 pd_ab_max_output = (gr->alpha_cb_default_size *
397 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
398 gr_pd_ab_dist_cfg1_max_output_granularity_v();
399
400 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
401 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
402 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
403
404 attrib_offset_in_chunk = alpha_offset_in_chunk +
405 gr->tpc_count * gr->alpha_cb_size;
406
407 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
408 temp = gpc_stride * gpc_index;
409 temp2 = num_pes_per_gpc * gpc_index;
410 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
411 ppc_index++) {
412 cbm_cfg_size_beta = cb_attrib_cache_size_init *
413 gr->pes_tpc_count[ppc_index][gpc_index];
414 cbm_cfg_size_alpha = gr->alpha_cb_default_size *
415 gr->pes_tpc_count[ppc_index][gpc_index];
416 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
417 gr->pes_tpc_count[ppc_index][gpc_index];
418
419 gr_gk20a_ctx_patch_write(g, ch_ctx,
420 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
421 ppc_in_gpc_stride * ppc_index,
422 cbm_cfg_size_beta, patch);
423
424 gr_gk20a_ctx_patch_write(g, ch_ctx,
425 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
426 ppc_in_gpc_stride * ppc_index,
427 attrib_offset_in_chunk, patch);
428
429 gr_gk20a_ctx_patch_write(g, ch_ctx,
430 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
431 ppc_in_gpc_stride * ppc_index,
432 cbm_cfg_size_steadystate,
433 patch);
434
435 attrib_offset_in_chunk += attrib_size_in_chunk *
436 gr->pes_tpc_count[ppc_index][gpc_index];
437
438 gr_gk20a_ctx_patch_write(g, ch_ctx,
439 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
440 ppc_in_gpc_stride * ppc_index,
441 cbm_cfg_size_alpha, patch);
442
443 gr_gk20a_ctx_patch_write(g, ch_ctx,
444 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
445 ppc_in_gpc_stride * ppc_index,
446 alpha_offset_in_chunk, patch);
447
448 alpha_offset_in_chunk += gr->alpha_cb_size *
449 gr->pes_tpc_count[ppc_index][gpc_index];
450
451 gr_gk20a_ctx_patch_write(g, ch_ctx,
452 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
453 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
454 patch);
455 }
456 }
457
458 return 0;
459}
460
461static void gr_gp10b_commit_global_pagepool(struct gk20a *g,
462 struct channel_ctx_gk20a *ch_ctx,
463 u64 addr, u32 size, bool patch)
464{
465 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
466 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
467
468 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
469 gr_scc_pagepool_total_pages_f(size) |
470 gr_scc_pagepool_valid_true_f(), patch);
471
472 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
473 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
474
475 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
476 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
477}
478
479static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
480 struct zbc_entry *color_val, u32 index)
481{
482 u32 i;
483 u32 zbc_c;
484
485 /* update l2 table */
486 g->ops.ltc.set_zbc_color_entry(g, color_val, index);
487
488 /* update ds table */
489 gk20a_writel(g, gr_ds_zbc_color_r_r(),
490 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
491 gk20a_writel(g, gr_ds_zbc_color_g_r(),
492 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
493 gk20a_writel(g, gr_ds_zbc_color_b_r(),
494 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
495 gk20a_writel(g, gr_ds_zbc_color_a_r(),
496 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
497
498 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
499 gr_ds_zbc_color_fmt_val_f(color_val->format));
500
501 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
502 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
503
504 /* trigger the write */
505 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
506 gr_ds_zbc_tbl_ld_select_c_f() |
507 gr_ds_zbc_tbl_ld_action_write_f() |
508 gr_ds_zbc_tbl_ld_trigger_active_f());
509
510 /* update local copy */
511 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
512 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
513 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
514 }
515 gr->zbc_col_tbl[index].format = color_val->format;
516 gr->zbc_col_tbl[index].ref_cnt++;
517
518 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_r_r(index),
519 color_val->color_ds[0]);
520 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_g_r(index),
521 color_val->color_ds[1]);
522 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_b_r(index),
523 color_val->color_ds[2]);
524 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_a_r(index),
525 color_val->color_ds[3]);
526 zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3));
527 zbc_c &= ~(0x7f << ((index % 4) * 7));
528 zbc_c |= color_val->format << ((index % 4) * 7);
529 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c);
530
531 return 0;
532}
533
534static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
535 struct zbc_entry *depth_val, u32 index)
536{
537 u32 zbc_z;
538
539 /* update l2 table */
540 g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
541
542 /* update ds table */
543 gk20a_writel(g, gr_ds_zbc_z_r(),
544 gr_ds_zbc_z_val_f(depth_val->depth));
545
546 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
547 gr_ds_zbc_z_fmt_val_f(depth_val->format));
548
549 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
550 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
551
552 /* trigger the write */
553 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
554 gr_ds_zbc_tbl_ld_select_z_f() |
555 gr_ds_zbc_tbl_ld_action_write_f() |
556 gr_ds_zbc_tbl_ld_trigger_active_f());
557
558 /* update local copy */
559 gr->zbc_dep_tbl[index].depth = depth_val->depth;
560 gr->zbc_dep_tbl[index].format = depth_val->format;
561 gr->zbc_dep_tbl[index].ref_cnt++;
562
563 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth);
564 zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3));
565 zbc_z &= ~(0x7f << (index % 4) * 7);
566 zbc_z |= depth_val->format << (index % 4) * 7;
567 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3), zbc_z);
568
569 return 0;
570}
571
572static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
573{
574 return gr_scc_pagepool_total_pages_hwmax_value_v();
575}
576
577static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
578{
579 struct gr_gk20a *gr = &g->gr;
580 int size;
581
582 gr->attrib_cb_size = gr->attrib_cb_default_size;
583 gr->alpha_cb_size = gr->alpha_cb_default_size;
584
585 gr->attrib_cb_size = min(gr->attrib_cb_size,
586 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count);
587 gr->alpha_cb_size = min(gr->alpha_cb_size,
588 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count);
589
590 size = gr->attrib_cb_size *
591 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
592 gr->max_tpc_count;
593
594 size += gr->alpha_cb_size *
595 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
596 gr->max_tpc_count;
597
598 size = ALIGN(size, 128);
599
600 return size;
601}
602
603static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data)
604{
605 gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
606}
607
608static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
609{
610 u32 val;
611
612 gk20a_dbg_fn("");
613
614 val = gk20a_readl(g, gr_gpcs_tc_debug0_r());
615 val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(),
616 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data));
617 gk20a_writel(g, gr_gpcs_tc_debug0_r(), val);
618
619 gk20a_dbg_fn("done");
620}
621
622static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
623 u32 class_num, u32 offset, u32 data)
624{
625 gk20a_dbg_fn("");
626
627 if (class_num == PASCAL_COMPUTE_A) {
628 switch (offset << 2) {
629 case NVC0C0_SET_SHADER_EXCEPTIONS:
630 gk20a_gr_set_shader_exceptions(g, data);
631 break;
632 default:
633 goto fail;
634 }
635 }
636
637 if (class_num == PASCAL_A) {
638 switch (offset << 2) {
639 case NVC097_SET_SHADER_EXCEPTIONS:
640 gk20a_gr_set_shader_exceptions(g, data);
641 break;
642 case NVC097_SET_CIRCULAR_BUFFER_SIZE:
643 g->ops.gr.set_circular_buffer_size(g, data);
644 break;
645 case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
646 g->ops.gr.set_alpha_circular_buffer_size(g, data);
647 break;
648 case NVC097_SET_GO_IDLE_TIMEOUT:
649 gr_gp10b_set_go_idle_timeout(g, data);
650 break;
651 case NVC097_SET_COALESCE_BUFFER_SIZE:
652 gr_gp10b_set_coalesce_buffer_size(g, data);
653 break;
654 default:
655 goto fail;
656 }
657 }
658 return 0;
659
660fail:
661 return -EINVAL;
662}
663
664static void gr_gp10b_cb_size_default(struct gk20a *g)
665{
666 struct gr_gk20a *gr = &g->gr;
667
668 if (!gr->attrib_cb_default_size)
669 gr->attrib_cb_default_size = 0x800;
670 gr->alpha_cb_default_size =
671 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
672}
673
674static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
675{
676 struct gr_gk20a *gr = &g->gr;
677 u32 gpc_index, ppc_index, stride, val;
678 u32 pd_ab_max_output;
679 u32 alpha_cb_size = data * 4;
680 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
681 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
682
683 gk20a_dbg_fn("");
684
685 if (alpha_cb_size > gr->alpha_cb_size)
686 alpha_cb_size = gr->alpha_cb_size;
687
688 gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
689 (gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) &
690 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
691 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
692
693 pd_ab_max_output = alpha_cb_size *
694 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
695 gr_pd_ab_dist_cfg1_max_output_granularity_v();
696
697 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
698 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
699 gr_pd_ab_dist_cfg1_max_batches_init_f());
700
701 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
702 stride = gpc_stride * gpc_index;
703
704 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
705 ppc_index++) {
706
707 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
708 stride +
709 ppc_in_gpc_stride * ppc_index);
710
711 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
712 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
713 gr->pes_tpc_count[ppc_index][gpc_index]));
714
715 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
716 stride +
717 ppc_in_gpc_stride * ppc_index, val);
718 }
719 }
720}
721
722static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
723{
724 struct gr_gk20a *gr = &g->gr;
725 u32 gpc_index, ppc_index, stride, val;
726 u32 cb_size_steady = data * 4, cb_size;
727 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
728 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
729
730 gk20a_dbg_fn("");
731
732 if (cb_size_steady > gr->attrib_cb_size)
733 cb_size_steady = gr->attrib_cb_size;
734 if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
735 gk20a_readl(g,
736 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) {
737 cb_size = cb_size_steady +
738 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
739 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
740 } else {
741 cb_size = cb_size_steady;
742 }
743
744 gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(),
745 (gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
746 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
747 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
748
749 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
750 stride = gpc_stride * gpc_index;
751
752 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
753 ppc_index++) {
754
755 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
756 stride +
757 ppc_in_gpc_stride * ppc_index);
758
759 val = set_field(val,
760 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
761 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
762 gr->pes_tpc_count[ppc_index][gpc_index]));
763
764 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
765 stride +
766 ppc_in_gpc_stride * ppc_index, val);
767
768 gk20a_writel(g, ppc_in_gpc_stride * ppc_index +
769 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
770 stride,
771 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
772 cb_size_steady));
773
774 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
775 ppc_index + gpc_index));
776
777 val = set_field(val,
778 gr_gpcs_swdx_tc_beta_cb_size_v_m(),
779 gr_gpcs_swdx_tc_beta_cb_size_v_f(
780 cb_size_steady *
781 gr->gpc_ppc_count[gpc_index]));
782
783 gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
784 ppc_index + gpc_index), val);
785 }
786 }
787}
788
789static int gr_gp10b_init_ctx_state(struct gk20a *g)
790{
791 struct fecs_method_op_gk20a op = {
792 .mailbox = { .id = 0, .data = 0,
793 .clr = ~0, .ok = 0, .fail = 0},
794 .method.data = 0,
795 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
796 .cond.fail = GR_IS_UCODE_OP_SKIP,
797 };
798 int err;
799
800 gk20a_dbg_fn("");
801
802 err = gr_gk20a_init_ctx_state(g);
803 if (err)
804 return err;
805
806 if (!g->gr.t18x.ctx_vars.preempt_image_size) {
807 op.method.addr =
808 gr_fecs_method_push_adr_discover_preemption_image_size_v();
809 op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
810 err = gr_gk20a_submit_fecs_method_op(g, op, false);
811 if (err) {
812 gk20a_err(dev_from_gk20a(g),
813 "query preempt image size failed");
814 return err;
815 }
816 }
817
818 gk20a_dbg_info("preempt image size: %u",
819 g->gr.t18x.ctx_vars.preempt_image_size);
820
821 gk20a_dbg_fn("done");
822
823 return 0;
824}
825
826int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
827 struct mem_desc *mem)
828{
829 int err;
830
831 gk20a_dbg_fn("");
832
833 err = gk20a_gmmu_alloc_sys(vm->mm->g, size, mem);
834 if (err)
835 return err;
836
837 mem->gpu_va = gk20a_gmmu_map(vm,
838 &mem->sgt,
839 size,
840 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
841 gk20a_mem_flag_none,
842 false,
843 mem->aperture);
844
845 if (!mem->gpu_va) {
846 err = -ENOMEM;
847 goto fail_free;
848 }
849
850 return 0;
851
852fail_free:
853 gk20a_gmmu_free(vm->mm->g, mem);
854 return err;
855}
856
857static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
858 struct gr_ctx_desc *gr_ctx,
859 struct vm_gk20a *vm, u32 class,
860 u32 graphics_preempt_mode,
861 u32 compute_preempt_mode)
862{
863 int err = 0;
864
865 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
866 graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
867
868 if (class == PASCAL_COMPUTE_A &&
869 g->gr.t18x.ctx_vars.force_preemption_cilp)
870 compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
871
872 /* check for invalid combinations */
873 if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
874 return -EINVAL;
875
876 if ((graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) &&
877 (compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP))
878 return -EINVAL;
879
880 /* Do not allow lower preemption modes than current ones */
881 if (graphics_preempt_mode &&
882 (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
883 return -EINVAL;
884
885 if (compute_preempt_mode &&
886 (compute_preempt_mode < gr_ctx->compute_preempt_mode))
887 return -EINVAL;
888
889 /* set preemption modes */
890 switch (graphics_preempt_mode) {
891 case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
892 {
893 u32 spill_size =
894 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
895 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
896 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
897 gr_scc_pagepool_total_pages_byte_granularity_v();
898 u32 betacb_size = g->gr.attrib_cb_default_size +
899 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
900 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
901 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
902 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
903 g->gr.max_tpc_count;
904 attrib_cb_size = ALIGN(attrib_cb_size, 128);
905
906 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
907 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
908 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
909 attrib_cb_size);
910
911 err = gr_gp10b_alloc_buffer(vm,
912 g->gr.t18x.ctx_vars.preempt_image_size,
913 &gr_ctx->t18x.preempt_ctxsw_buffer);
914 if (err) {
915 gk20a_err(dev_from_gk20a(g),
916 "cannot allocate preempt buffer");
917 goto fail;
918 }
919
920 err = gr_gp10b_alloc_buffer(vm,
921 spill_size,
922 &gr_ctx->t18x.spill_ctxsw_buffer);
923 if (err) {
924 gk20a_err(dev_from_gk20a(g),
925 "cannot allocate spill buffer");
926 goto fail_free_preempt;
927 }
928
929 err = gr_gp10b_alloc_buffer(vm,
930 attrib_cb_size,
931 &gr_ctx->t18x.betacb_ctxsw_buffer);
932 if (err) {
933 gk20a_err(dev_from_gk20a(g),
934 "cannot allocate beta buffer");
935 goto fail_free_spill;
936 }
937
938 err = gr_gp10b_alloc_buffer(vm,
939 pagepool_size,
940 &gr_ctx->t18x.pagepool_ctxsw_buffer);
941 if (err) {
942 gk20a_err(dev_from_gk20a(g),
943 "cannot allocate page pool");
944 goto fail_free_betacb;
945 }
946
947 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
948 break;
949 }
950
951 case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
952 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
953 break;
954
955 default:
956 break;
957 }
958
959 if (class == PASCAL_COMPUTE_A) {
960 switch (compute_preempt_mode) {
961 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
962 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
963 case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
964 gr_ctx->compute_preempt_mode = compute_preempt_mode;
965 break;
966 default:
967 break;
968 }
969 }
970
971 return 0;
972
973fail_free_betacb:
974 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
975fail_free_spill:
976 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
977fail_free_preempt:
978 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
979fail:
980 return err;
981}
982
983static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
984 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
985 u32 class,
986 u32 flags)
987{
988 int err;
989 u32 graphics_preempt_mode = 0;
990 u32 compute_preempt_mode = 0;
991
992 gk20a_dbg_fn("");
993
994 err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
995 if (err)
996 return err;
997
998 (*gr_ctx)->t18x.ctx_id_valid = false;
999
1000 if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
1001 graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
1002 if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
1003 compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
1004
1005 if (graphics_preempt_mode || compute_preempt_mode) {
1006 if (g->ops.gr.set_ctxsw_preemption_mode) {
1007 err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm,
1008 class, graphics_preempt_mode, compute_preempt_mode);
1009 if (err) {
1010 gk20a_err(dev_from_gk20a(g),
1011 "set_ctxsw_preemption_mode failed");
1012 goto fail_free_gk20a_ctx;
1013 }
1014 } else
1015 goto fail_free_gk20a_ctx;
1016 }
1017
1018 gk20a_dbg_fn("done");
1019
1020 return 0;
1021
1022fail_free_gk20a_ctx:
1023 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
1024 *gr_ctx = NULL;
1025
1026 return err;
1027}
1028
1029static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
1030 struct gr_ctx_desc *gr_ctx)
1031{
1032 struct mem_desc *mem = &gr_ctx->mem;
1033
1034 if (gk20a_mem_begin(g, mem)) {
1035 WARN_ON("Cannot map context");
1036 return;
1037 }
1038 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n",
1039 gk20a_mem_rd(g, mem,
1040 ctxsw_prog_main_image_magic_value_o()),
1041 ctxsw_prog_main_image_magic_value_v_value_v());
1042
1043 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n",
1044 gk20a_mem_rd(g, mem,
1045 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
1046
1047 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n",
1048 gk20a_mem_rd(g, mem,
1049 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
1050
1051 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n",
1052 gk20a_mem_rd(g, mem,
1053 ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
1054
1055 gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
1056 gk20a_mem_rd(g, mem,
1057 ctxsw_prog_main_image_num_save_ops_o()));
1058 gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n",
1059 gk20a_mem_rd(g, mem,
1060 ctxsw_prog_main_image_num_wfi_save_ops_o()));
1061 gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n",
1062 gk20a_mem_rd(g, mem,
1063 ctxsw_prog_main_image_num_cta_save_ops_o()));
1064 gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n",
1065 gk20a_mem_rd(g, mem,
1066 ctxsw_prog_main_image_num_gfxp_save_ops_o()));
1067 gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n",
1068 gk20a_mem_rd(g, mem,
1069 ctxsw_prog_main_image_num_cilp_save_ops_o()));
1070 gk20a_err(dev_from_gk20a(g),
1071 "image gfx preemption option (GFXP is 1) %x\n",
1072 gk20a_mem_rd(g, mem,
1073 ctxsw_prog_main_image_graphics_preemption_options_o()));
1074 gk20a_mem_end(g, mem);
1075}
1076
1077static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
1078 struct gr_ctx_desc *gr_ctx)
1079{
1080 gk20a_dbg_fn("");
1081
1082 if (!gr_ctx)
1083 return;
1084
1085 if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
1086 dump_ctx_switch_stats(g, vm, gr_ctx);
1087
1088 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
1089 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
1090 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
1091 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
1092 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
1093 gk20a_dbg_fn("done");
1094}
1095
1096
1097static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1098 struct channel_ctx_gk20a *ch_ctx,
1099 struct mem_desc *mem)
1100{
1101 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1102 u32 gfxp_preempt_option =
1103 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1104 u32 cilp_preempt_option =
1105 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
1106 u32 cta_preempt_option =
1107 ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
1108 int err;
1109
1110 gk20a_dbg_fn("");
1111
1112 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
1113 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1114 gk20a_mem_wr(g, mem,
1115 ctxsw_prog_main_image_graphics_preemption_options_o(),
1116 gfxp_preempt_option);
1117 }
1118
1119 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
1120 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
1121 gk20a_mem_wr(g, mem,
1122 ctxsw_prog_main_image_compute_preemption_options_o(),
1123 cilp_preempt_option);
1124 }
1125
1126 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
1127 gk20a_dbg_info("CTA: %x", cta_preempt_option);
1128 gk20a_mem_wr(g, mem,
1129 ctxsw_prog_main_image_compute_preemption_options_o(),
1130 cta_preempt_option);
1131 }
1132
1133 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
1134 u32 addr;
1135 u32 size;
1136 u32 cbes_reserve;
1137
1138 gk20a_mem_wr(g, mem,
1139 ctxsw_prog_main_image_full_preemption_ptr_o(),
1140 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
1141
1142 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
1143 if (err) {
1144 gk20a_err(dev_from_gk20a(g),
1145 "can't map patch context");
1146 goto out;
1147 }
1148
1149 addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
1150 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1151 (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
1152 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1153
1154 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1155 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
1156
1157 addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
1158 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
1159 (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
1160 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
1161 size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
1162
1163 if (size == g->ops.gr.pagepool_default_size(g))
1164 size = gr_scc_pagepool_total_pages_hwmax_v();
1165
1166 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
1167
1168 addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
1169 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
1170 (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
1171 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
1172 size = gr_ctx->t18x.spill_ctxsw_buffer.size /
1173 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1174
1175 gr_gk20a_ctx_patch_write(g, ch_ctx,
1176 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1177 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1178 true);
1179 gr_gk20a_ctx_patch_write(g, ch_ctx,
1180 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1181 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1182 true);
1183
1184 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1185 gr_gk20a_ctx_patch_write(g, ch_ctx,
1186 gr_gpcs_swdx_beta_cb_ctrl_r(),
1187 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1188 cbes_reserve),
1189 true);
1190 gr_gk20a_ctx_patch_write(g, ch_ctx,
1191 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1192 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1193 cbes_reserve),
1194 true);
1195
1196 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
1197 }
1198
1199out:
1200 gk20a_dbg_fn("done");
1201}
1202
1203static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
1204 struct gk20a_debug_output *o)
1205{
1206 struct gr_gk20a *gr = &g->gr;
1207 u32 gr_engine_id;
1208
1209 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
1210
1211 gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
1212 gk20a_readl(g, gr_status_r()));
1213 gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
1214 gk20a_readl(g, gr_status_1_r()));
1215 gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
1216 gk20a_readl(g, gr_status_2_r()));
1217 gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
1218 gk20a_readl(g, gr_engine_status_r()));
1219 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
1220 gk20a_readl(g, gr_gpfifo_status_r()));
1221 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
1222 gk20a_readl(g, gr_gpfifo_ctl_r()));
1223 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
1224 gk20a_readl(g, gr_fecs_host_int_status_r()));
1225 gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n",
1226 gk20a_readl(g, gr_exception_r()));
1227 gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n",
1228 gk20a_readl(g, gr_fecs_intr_r()));
1229 gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
1230 gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
1231 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
1232 gk20a_readl(g, gr_activity_0_r()));
1233 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
1234 gk20a_readl(g, gr_activity_1_r()));
1235 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
1236 gk20a_readl(g, gr_activity_2_r()));
1237 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
1238 gk20a_readl(g, gr_activity_4_r()));
1239 gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
1240 gk20a_readl(g, gr_pri_sked_activity_r()));
1241 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1242 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
1243 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1244 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
1245 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1246 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
1247 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1248 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
1249 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1250 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
1251 if (gr->gpc_tpc_count[0] == 2)
1252 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1253 gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
1254 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1255 gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
1256 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1257 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
1258 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1259 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
1260 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1261 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
1262 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1263 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
1264 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1265 gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
1266 if (gr->gpc_tpc_count[0] == 2)
1267 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1268 gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
1269 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1270 gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
1271 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
1272 gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
1273 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
1274 gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
1275 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
1276 gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
1277 gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
1278 gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
1279 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
1280 gk20a_readl(g, gr_fe_go_idle_timeout_r()));
1281 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
1282 gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
1283 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
1284 gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
1285 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
1286 gk20a_readl(g, gr_cwd_fs_r()));
1287 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n",
1288 gk20a_readl(g, gr_fe_tpc_fs_r()));
1289 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n",
1290 gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
1291 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
1292 gk20a_readl(g, gr_cwd_sm_id_r(0)));
1293 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
1294 gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
1295 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
1296 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
1297 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
1298 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
1299 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
1300 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
1301 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
1302 gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
1303 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
1304 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
1305 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
1306 gk20a_readl(g, gr_fecs_current_ctx_r()));
1307 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
1308 gk20a_readl(g, gr_fecs_new_ctx_r()));
1309 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
1310 gk20a_readl(g, gr_pri_be0_crop_status1_r()));
1311 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
1312 gk20a_readl(g, gr_pri_bes_crop_status1_r()));
1313 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
1314 gk20a_readl(g, gr_pri_be0_zrop_status_r()));
1315 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
1316 gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
1317 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
1318 gk20a_readl(g, gr_pri_bes_zrop_status_r()));
1319 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
1320 gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
1321 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
1322 gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
1323 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
1324 gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
1325 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
1326 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
1327 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
1328 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
1329 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
1330 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
1331 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
1332 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
1333 return 0;
1334}
1335
1336static bool gr_activity_empty_or_preempted(u32 val)
1337{
1338 while(val) {
1339 u32 v = val & 7;
1340 if (v != gr_activity_4_gpc0_empty_v() &&
1341 v != gr_activity_4_gpc0_preempted_v())
1342 return false;
1343 val >>= 3;
1344 }
1345
1346 return true;
1347}
1348
1349static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long end_jiffies,
1350 u32 expect_delay)
1351{
1352 u32 delay = expect_delay;
1353 bool gr_enabled;
1354 bool ctxsw_active;
1355 bool gr_busy;
1356 u32 gr_status;
1357 u32 activity0, activity1, activity2, activity4;
1358
1359 gk20a_dbg_fn("");
1360
1361 do {
1362 /* fmodel: host gets fifo_engine_status(gr) from gr
1363 only when gr_status is read */
1364 gr_status = gk20a_readl(g, gr_status_r());
1365
1366 gr_enabled = gk20a_readl(g, mc_enable_r()) &
1367 mc_enable_pgraph_enabled_f();
1368
1369 ctxsw_active = gr_status & 1<<7;
1370
1371 activity0 = gk20a_readl(g, gr_activity_0_r());
1372 activity1 = gk20a_readl(g, gr_activity_1_r());
1373 activity2 = gk20a_readl(g, gr_activity_2_r());
1374 activity4 = gk20a_readl(g, gr_activity_4_r());
1375
1376 gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
1377 gr_activity_empty_or_preempted(activity1) &&
1378 activity2 == 0 &&
1379 gr_activity_empty_or_preempted(activity4));
1380
1381 if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
1382 gk20a_dbg_fn("done");
1383 return 0;
1384 }
1385
1386 usleep_range(delay, delay * 2);
1387 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
1388
1389 } while (time_before(jiffies, end_jiffies)
1390 || !tegra_platform_is_silicon());
1391
1392 gk20a_err(dev_from_gk20a(g),
1393 "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
1394 ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
1395
1396 return -EAGAIN;
1397}
1398
1399static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1400 struct channel_ctx_gk20a *ch_ctx,
1401 u64 addr, bool patch)
1402{
1403 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1404 int attrBufferSize;
1405
1406 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va)
1407 attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size;
1408 else
1409 attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
1410
1411 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1412
1413 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
1414
1415 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1416 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1417 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1418
1419 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1420 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1421
1422 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1423 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1424 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1425}
1426
1427static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1428 struct channel_ctx_gk20a *ch_ctx,
1429 u64 addr, u64 size, bool patch)
1430{
1431 u32 data;
1432
1433 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
1434 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1435
1436 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
1437 gr_scc_bundle_cb_size_div_256b_f(size) |
1438 gr_scc_bundle_cb_size_valid_true_f(), patch);
1439
1440 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1441 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1442
1443 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1444 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1445 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1446
1447 /* data for state_limit */
1448 data = (g->gr.bundle_cb_default_size *
1449 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
1450 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
1451
1452 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
1453
1454 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1455 g->gr.bundle_cb_token_limit, data);
1456
1457 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
1458 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1459 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1460}
1461
1462static int gr_gp10b_load_smid_config(struct gk20a *g)
1463{
1464 u32 *tpc_sm_id;
1465 u32 i, j;
1466 u32 tpc_index, gpc_index;
1467 u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
1468
1469 tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
1470 if (!tpc_sm_id)
1471 return -ENOMEM;
1472
1473 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
1474 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
1475 u32 reg = 0;
1476 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
1477 gr_cwd_gpc_tpc_id_tpc0_s();
1478
1479 for (j = 0; j < 4; j++) {
1480 u32 sm_id = (i * 4) + j;
1481 u32 bits;
1482
1483 if (sm_id >= g->gr.tpc_count)
1484 break;
1485
1486 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1487 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1488
1489 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
1490 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
1491 reg |= bits << (j * bit_stride);
1492
1493 tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4) >> 2)]
1494 |= sm_id << (bit_stride * (tpc_index & 3));
1495 }
1496 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
1497 }
1498
1499 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1500 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1501
1502 kfree(tpc_sm_id);
1503
1504 return 0;
1505}
1506
1507int gr_gp10b_init_fs_state(struct gk20a *g)
1508{
1509 u32 data;
1510
1511 gk20a_dbg_fn("");
1512
1513 data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
1514 data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
1515 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
1516 gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data);
1517
1518 data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
1519 data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(),
1520 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
1521 gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
1522
1523 if (g->gr.t18x.fecs_feature_override_ecc_val != 0) {
1524 gk20a_writel(g,
1525 gr_fecs_feature_override_ecc_r(),
1526 g->gr.t18x.fecs_feature_override_ecc_val);
1527 }
1528
1529 return gr_gm20b_init_fs_state(g);
1530}
1531
1532static void gr_gp10b_init_cyclestats(struct gk20a *g)
1533{
1534#if defined(CONFIG_GK20A_CYCLE_STATS)
1535 g->gpu_characteristics.flags |=
1536 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
1537 g->gpu_characteristics.flags |=
1538 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT;
1539#else
1540 (void)g;
1541#endif
1542}
1543
1544static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1545{
1546 tegra_fuse_control_write(0x1, FUSE_FUSEBYPASS_0);
1547 tegra_fuse_control_write(0x0, FUSE_WRITE_ACCESS_SW_0);
1548
1549 if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
1550 tegra_fuse_control_write(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
1551 else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2)
1552 tegra_fuse_control_write(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0);
1553 else
1554 tegra_fuse_control_write(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
1555}
1556
1557static void gr_gp10b_get_access_map(struct gk20a *g,
1558 u32 **whitelist, int *num_entries)
1559{
1560 static u32 wl_addr_gp10b[] = {
1561 /* this list must be sorted (low to high) */
1562 0x404468, /* gr_pri_mme_max_instructions */
1563 0x418300, /* gr_pri_gpcs_rasterarb_line_class */
1564 0x418800, /* gr_pri_gpcs_setup_debug */
1565 0x418e00, /* gr_pri_gpcs_swdx_config */
1566 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1567 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1568 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1569 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1570 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1571 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1572 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1573 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1574 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1575 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1576 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1577 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1578 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1579 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1580 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1581 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1582 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1583 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1584 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1585 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1586 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1587 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
1588 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
1589 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
1590 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
1591 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
1592 };
1593
1594 *whitelist = wl_addr_gp10b;
1595 *num_entries = ARRAY_SIZE(wl_addr_gp10b);
1596}
1597
1598static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
1599{
1600 int ret = 0;
1601
1602 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1603
1604 ret = gk20a_disable_channel_tsg(g, fault_ch);
1605 if (ret) {
1606 gk20a_err(dev_from_gk20a(g),
1607 "CILP: failed to disable channel/TSG!\n");
1608 return ret;
1609 }
1610
1611 ret = g->ops.fifo.update_runlist(g, fault_ch->runlist_id, ~0, true, false);
1612 if (ret) {
1613 gk20a_err(dev_from_gk20a(g),
1614 "CILP: failed to restart runlist 0!");
1615 return ret;
1616 }
1617
1618 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
1619
1620 if (gk20a_is_channel_marked_as_tsg(fault_ch))
1621 gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
1622 else
1623 gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
1624
1625 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg");
1626
1627 return ret;
1628}
1629
1630static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch)
1631{
1632 int ret;
1633 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1634
1635 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1636
1637 if (!gr_ctx)
1638 return -EINVAL;
1639
1640 if (gr_ctx->t18x.cilp_preempt_pending) {
1641 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1642 "CILP is already pending for chid %d",
1643 fault_ch->hw_chid);
1644 return 0;
1645 }
1646
1647 /* get ctx_id from the ucode image */
1648 if (!gr_ctx->t18x.ctx_id_valid) {
1649 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1650 "CILP: looking up ctx id");
1651 ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
1652 if (ret) {
1653 gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
1654 return ret;
1655 }
1656 gr_ctx->t18x.ctx_id_valid = true;
1657 }
1658
1659 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1660 "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
1661
1662 /* send ucode method to set ctxsw interrupt */
1663 ret = gr_gk20a_submit_fecs_sideband_method_op(g,
1664 (struct fecs_method_op_gk20a) {
1665 .method.data = gr_ctx->t18x.ctx_id,
1666 .method.addr =
1667 gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
1668 .mailbox = {
1669 .id = 1 /* sideband */, .data = 0,
1670 .clr = ~0, .ret = NULL,
1671 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
1672 .fail = 0},
1673 .cond.ok = GR_IS_UCODE_OP_EQUAL,
1674 .cond.fail = GR_IS_UCODE_OP_SKIP});
1675
1676 if (ret) {
1677 gk20a_err(dev_from_gk20a(g),
1678 "CILP: failed to enable ctxsw interrupt!");
1679 return ret;
1680 }
1681
1682 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1683 "CILP: enabled ctxsw completion interrupt");
1684
1685 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1686 "CILP: disabling channel %d",
1687 fault_ch->hw_chid);
1688
1689 ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
1690 if (ret) {
1691 gk20a_err(dev_from_gk20a(g),
1692 "CILP: failed to disable channel!!");
1693 return ret;
1694 }
1695
1696 /* set cilp_preempt_pending = true and record the channel */
1697 gr_ctx->t18x.cilp_preempt_pending = true;
1698 g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
1699
1700 if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
1701 struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid];
1702
1703 gk20a_tsg_event_id_post_event(tsg,
1704 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED);
1705 } else {
1706 gk20a_channel_event_id_post_event(fault_ch,
1707 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED);
1708 }
1709
1710 return 0;
1711}
1712
1713static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
1714 struct channel_gk20a *fault_ch)
1715{
1716 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1717
1718 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1719
1720 if (!gr_ctx)
1721 return -EINVAL;
1722
1723 /* The ucode is self-clearing, so all we need to do here is
1724 to clear cilp_preempt_pending. */
1725 if (!gr_ctx->t18x.cilp_preempt_pending) {
1726 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1727 "CILP is already cleared for chid %d\n",
1728 fault_ch->hw_chid);
1729 return 0;
1730 }
1731
1732 gr_ctx->t18x.cilp_preempt_pending = false;
1733 g->gr.t18x.cilp_preempt_pending_chid = -1;
1734
1735 return 0;
1736}
1737
1738/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1739 *
1740 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
1741 */
1742static int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1743 u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
1744 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
1745 bool *early_exit, bool *ignore_debugger)
1746{
1747 int ret;
1748 bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
1749 NVGPU_COMPUTE_PREEMPTION_MODE_CILP) ;
1750 u32 global_mask = 0, dbgr_control0, global_esr_copy;
1751 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1752 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1753 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1754
1755 *early_exit = false;
1756 *ignore_debugger = false;
1757
1758 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
1759 gpc, tpc, global_esr);
1760
1761 if (cilp_enabled && sm_debugger_attached) {
1762 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
1763 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1764 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
1765
1766 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
1767 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1768 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
1769
1770 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() |
1771 gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() |
1772 gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() |
1773 gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() |
1774 gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() |
1775 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
1776
1777 if (warp_esr != 0 || (global_esr & global_mask) != 0) {
1778 *ignore_debugger = true;
1779
1780 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1781 "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
1782 gpc, tpc);
1783
1784 if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
1785 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1786 "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
1787 gpc, tpc);
1788 gk20a_suspend_all_sms(g, global_mask, false);
1789
1790 gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
1791 } else {
1792 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1793 "CILP: STOP_TRIGGER from gpc %d tpc %d\n",
1794 gpc, tpc);
1795 gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
1796 }
1797
1798 /* reset the HWW errors after locking down */
1799 global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
1800 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
1801 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1802 "CILP: HWWs cleared for gpc %d tpc %d\n",
1803 gpc, tpc);
1804
1805 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
1806 ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
1807 if (ret) {
1808 gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
1809 return ret;
1810 }
1811
1812 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
1813 if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
1814 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1815 "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
1816 gpc, tpc);
1817 dbgr_control0 = set_field(dbgr_control0,
1818 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
1819 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
1820 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
1821 }
1822
1823 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1824 "CILP: resume for gpc %d tpc %d\n",
1825 gpc, tpc);
1826 gk20a_resume_single_sm(g, gpc, tpc);
1827
1828 *ignore_debugger = true;
1829 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
1830 }
1831
1832 *early_exit = true;
1833 }
1834 return 0;
1835}
1836
1837static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1838{
1839 struct gr_ctx_desc *gr_ctx;
1840 struct channel_gk20a *ch;
1841 int chid;
1842 int ret = -EINVAL;
1843
1844 chid = g->gr.t18x.cilp_preempt_pending_chid;
1845
1846 ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
1847 if (!ch)
1848 return ret;
1849
1850 gr_ctx = ch->ch_ctx.gr_ctx;
1851
1852 if (gr_ctx->t18x.cilp_preempt_pending) {
1853 *__chid = chid;
1854 ret = 0;
1855 }
1856
1857 gk20a_channel_put(ch);
1858
1859 return ret;
1860}
1861
1862static int gr_gp10b_handle_fecs_error(struct gk20a *g,
1863 struct channel_gk20a *__ch,
1864 struct gr_gk20a_isr_data *isr_data)
1865{
1866 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
1867 struct channel_gk20a *ch;
1868 int chid = -1;
1869 int ret = 0;
1870
1871 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1872
1873 /*
1874 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
1875 * indicates that a CILP ctxsw save has finished
1876 */
1877 if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
1878 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1879 "CILP: ctxsw save completed!\n");
1880
1881 /* now clear the interrupt */
1882 gk20a_writel(g, gr_fecs_host_int_clear_r(),
1883 gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
1884
1885 ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
1886 if (ret)
1887 goto clean_up;
1888
1889 ch = gk20a_channel_get(
1890 gk20a_fifo_channel_from_hw_chid(g, chid));
1891 if (!ch)
1892 goto clean_up;
1893
1894
1895 /* set preempt_pending to false */
1896 ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
1897 if (ret) {
1898 gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
1899 gk20a_channel_put(ch);
1900 goto clean_up;
1901 }
1902
1903 /* Post events to UMD */
1904 gk20a_dbg_gpu_post_events(ch);
1905
1906 if (gk20a_is_channel_marked_as_tsg(ch)) {
1907 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
1908
1909 gk20a_tsg_event_id_post_event(tsg,
1910 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
1911 } else {
1912 gk20a_channel_event_id_post_event(ch,
1913 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
1914 }
1915
1916 gk20a_channel_put(ch);
1917 }
1918
1919clean_up:
1920 /* handle any remaining interrupts */
1921 return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
1922}
1923
1924static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
1925{
1926 if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
1927 hww_warp_esr = set_field(hww_warp_esr,
1928 gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_m(),
1929 gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_none_f());
1930
1931 return hww_warp_esr;
1932}
1933
1934static u32 get_ecc_override_val(struct gk20a *g)
1935{
1936 u32 val;
1937
1938 tegra_fuse_readl(FUSE_OPT_ECC_EN, &val);
1939 if (val)
1940 return gk20a_readl(g, gr_fecs_feature_override_ecc_r());
1941
1942 return 0;
1943}
1944
1945static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
1946 bool *cilp_preempt_pending)
1947{
1948 struct gk20a *g = ch->g;
1949 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
1950 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1951 bool ctx_resident = false;
1952 int err = 0;
1953
1954 *cilp_preempt_pending = false;
1955
1956 if (gk20a_is_channel_ctx_resident(ch)) {
1957 gk20a_suspend_all_sms(g, 0, false);
1958
1959 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
1960 err = gr_gp10b_set_cilp_preempt_pending(g, ch);
1961 if (err)
1962 gk20a_err(dev_from_gk20a(g),
1963 "unable to set CILP preempt pending\n");
1964 else
1965 *cilp_preempt_pending = true;
1966
1967 gk20a_resume_all_sms(g);
1968 }
1969
1970 ctx_resident = true;
1971 } else {
1972 gk20a_disable_channel_tsg(g, ch);
1973 }
1974
1975 return ctx_resident;
1976}
1977
1978static int gr_gp10b_suspend_contexts(struct gk20a *g,
1979 struct dbg_session_gk20a *dbg_s,
1980 int *ctx_resident_ch_fd)
1981{
1982 u32 delay = GR_IDLE_CHECK_DEFAULT;
1983 bool cilp_preempt_pending = false;
1984 struct channel_gk20a *cilp_preempt_pending_ch = NULL;
1985 struct channel_gk20a *ch;
1986 struct dbg_session_channel_data *ch_data;
1987 int err = 0;
1988 int local_ctx_resident_ch_fd = -1;
1989 bool ctx_resident;
1990
1991 mutex_lock(&g->dbg_sessions_lock);
1992
1993 err = gr_gk20a_disable_ctxsw(g);
1994 if (err) {
1995 gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
1996 mutex_unlock(&g->dbg_sessions_lock);
1997 goto clean_up;
1998 }
1999
2000 mutex_lock(&dbg_s->ch_list_lock);
2001
2002 list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
2003 ch = g->fifo.channel + ch_data->chid;
2004
2005 ctx_resident = gr_gp10b_suspend_context(ch,
2006 &cilp_preempt_pending);
2007 if (ctx_resident)
2008 local_ctx_resident_ch_fd = ch_data->channel_fd;
2009 if (cilp_preempt_pending)
2010 cilp_preempt_pending_ch = ch;
2011 }
2012
2013 mutex_unlock(&dbg_s->ch_list_lock);
2014
2015 err = gr_gk20a_enable_ctxsw(g);
2016 if (err) {
2017 mutex_unlock(&g->dbg_sessions_lock);
2018 goto clean_up;
2019 }
2020
2021 mutex_unlock(&g->dbg_sessions_lock);
2022
2023 if (cilp_preempt_pending_ch) {
2024 struct channel_ctx_gk20a *ch_ctx =
2025 &cilp_preempt_pending_ch->ch_ctx;
2026 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
2027 unsigned long end_jiffies = jiffies +
2028 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
2029
2030 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
2031 "CILP preempt pending, waiting %lu msecs for preemption",
2032 gk20a_get_gr_idle_timeout(g));
2033
2034 do {
2035 if (!gr_ctx->t18x.cilp_preempt_pending)
2036 break;
2037
2038 usleep_range(delay, delay * 2);
2039 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2040 } while (time_before(jiffies, end_jiffies)
2041 || !tegra_platform_is_silicon());
2042
2043 /* If cilp is still pending at this point, timeout */
2044 if (gr_ctx->t18x.cilp_preempt_pending)
2045 err = -ETIMEDOUT;
2046 }
2047
2048 *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
2049
2050clean_up:
2051 return err;
2052}
2053
2054static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2055 u32 graphics_preempt_mode,
2056 u32 compute_preempt_mode)
2057{
2058 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
2059 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
2060 struct gk20a *g = ch->g;
2061 struct tsg_gk20a *tsg;
2062 struct vm_gk20a *vm;
2063 struct mem_desc *mem = &gr_ctx->mem;
2064 u32 class;
2065 int err = 0;
2066
2067 class = ch->obj_class;
2068 if (!class)
2069 return -EINVAL;
2070
2071 if (gk20a_is_channel_marked_as_tsg(ch)) {
2072 tsg = &g->fifo.tsg[ch->tsgid];
2073 vm = tsg->vm;
2074 } else {
2075 vm = ch->vm;
2076 }
2077
2078 /* skip setting anything if both modes are already set */
2079 if (graphics_preempt_mode &&
2080 (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
2081 graphics_preempt_mode = 0;
2082
2083 if (compute_preempt_mode &&
2084 (compute_preempt_mode == gr_ctx->compute_preempt_mode))
2085 compute_preempt_mode = 0;
2086
2087 if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
2088 return 0;
2089
2090 if (g->ops.gr.set_ctxsw_preemption_mode) {
2091 err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
2092 graphics_preempt_mode, compute_preempt_mode);
2093 if (err) {
2094 gk20a_err(dev_from_gk20a(g),
2095 "set_ctxsw_preemption_mode failed");
2096 return err;
2097 }
2098 }
2099
2100 if (gk20a_mem_begin(g, mem))
2101 return -ENOMEM;
2102
2103 err = gk20a_disable_channel_tsg(g, ch);
2104 if (err)
2105 goto unmap_ctx;
2106
2107 err = gk20a_fifo_preempt(g, ch);
2108 if (err)
2109 goto enable_ch;
2110
2111 if (g->ops.gr.update_ctxsw_preemption_mode) {
2112 g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem);
2113
2114 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
2115 if (err) {
2116 gk20a_err(dev_from_gk20a(g),
2117 "can't map patch context");
2118 goto enable_ch;
2119 }
2120 g->ops.gr.commit_global_cb_manager(g, ch, true);
2121 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
2122 }
2123
2124enable_ch:
2125 gk20a_enable_channel_tsg(g, ch);
2126unmap_ctx:
2127 gk20a_mem_end(g, mem);
2128
2129 return err;
2130}
2131
2132static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
2133 struct nvgpu_preemption_modes_rec *preemption_modes_rec)
2134{
2135 preemption_modes_rec->graphics_preemption_mode_flags = (
2136 NVGPU_GRAPHICS_PREEMPTION_MODE_WFI |
2137 NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP);
2138 preemption_modes_rec->compute_preemption_mode_flags = (
2139 NVGPU_COMPUTE_PREEMPTION_MODE_WFI |
2140 NVGPU_COMPUTE_PREEMPTION_MODE_CTA |
2141 NVGPU_COMPUTE_PREEMPTION_MODE_CILP);
2142
2143 preemption_modes_rec->default_graphics_preempt_mode =
2144 NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
2145 preemption_modes_rec->default_compute_preempt_mode =
2146 NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
2147
2148 return 0;
2149}
2150static int gp10b_gr_fuse_override(struct gk20a *g)
2151{
2152 struct device_node *np = g->dev->of_node;
2153 u32 *fuses;
2154 int count, i;
2155
2156 if (!np) /* may be pcie device */
2157 return 0;
2158
2159 count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
2160 if (count <= 0)
2161 return count;
2162
2163 fuses = kmalloc(sizeof(u32) * count * 2, GFP_KERNEL);
2164 if (!fuses)
2165 return -ENOMEM;
2166 of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
2167 for (i = 0; i < count; i++) {
2168 u32 fuse, value;
2169
2170 fuse = fuses[2 * i];
2171 value = fuses[2 * i + 1];
2172 switch (fuse) {
2173 case GM20B_FUSE_OPT_TPC_DISABLE:
2174 gm20b_gr_tpc_disable_override(g, value);
2175 break;
2176 case GP10B_FUSE_OPT_ECC_EN:
2177 g->gr.t18x.fecs_feature_override_ecc_val = value;
2178 break;
2179 default:
2180 gk20a_err(dev_from_gk20a(g),
2181 "ignore unknown fuse override %08x", fuse);
2182 break;
2183 }
2184 }
2185
2186 kfree(fuses);
2187
2188 return 0;
2189}
2190
2191static int gr_gp10b_init_preemption_state(struct gk20a *g)
2192{
2193 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
2194 u32 debug_2;
2195 u64 sysclk_rate;
2196 u32 sysclk_cycles;
2197
2198 sysclk_rate = platform->clk_get_rate(g->dev);
2199 sysclk_cycles = (u32)((sysclk_rate * NVGPU_GFXP_WFI_TIMEOUT_US) / 1000000ULL);
2200 gk20a_writel(g, gr_fe_gfxp_wfi_timeout_r(),
2201 gr_fe_gfxp_wfi_timeout_count_f(sysclk_cycles));
2202
2203 debug_2 = gk20a_readl(g, gr_debug_2_r());
2204 debug_2 = set_field(debug_2,
2205 gr_debug_2_gfxp_wfi_always_injects_wfi_m(),
2206 gr_debug_2_gfxp_wfi_always_injects_wfi_enabled_f());
2207 gk20a_writel(g, gr_debug_2_r(), debug_2);
2208
2209 return 0;
2210}
2211
2212void gp10b_init_gr(struct gpu_ops *gops)
2213{
2214 gm20b_init_gr(gops);
2215 gops->gr.init_fs_state = gr_gp10b_init_fs_state;
2216 gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
2217 gops->gr.is_valid_class = gr_gp10b_is_valid_class;
2218 gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
2219 gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
2220 gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
2221 gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
2222 gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
2223 gops->gr.calc_global_ctx_buffer_size =
2224 gr_gp10b_calc_global_ctx_buffer_size;
2225 gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb;
2226 gops->gr.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb;
2227 gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
2228 gops->gr.cb_size_default = gr_gp10b_cb_size_default;
2229 gops->gr.set_alpha_circular_buffer_size =
2230 gr_gp10b_set_alpha_circular_buffer_size;
2231 gops->gr.set_circular_buffer_size =
2232 gr_gp10b_set_circular_buffer_size;
2233 gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
2234 gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
2235 gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
2236 gops->gr.update_ctxsw_preemption_mode =
2237 gr_gp10b_update_ctxsw_preemption_mode;
2238 gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
2239 gops->gr.wait_empty = gr_gp10b_wait_empty;
2240 gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
2241 gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
2242 gops->gr.get_access_map = gr_gp10b_get_access_map;
2243 gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
2244 gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
2245 gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
2246 gops->gr.pre_process_sm_exception =
2247 gr_gp10b_pre_process_sm_exception;
2248 gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
2249 gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
2250 gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
2251 gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
2252 gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
2253 gops->gr.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode;
2254 gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
2255 gops->gr.fuse_override = gp10b_gr_fuse_override;
2256 gops->gr.load_smid_config = gr_gp10b_load_smid_config;
2257}