summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c3639
1 files changed, 3639 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
new file mode 100644
index 00000000..3d817d7e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -0,0 +1,3639 @@
1/*
2 * GV11b GPU GR
3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <linux/delay.h>
26#include <linux/version.h>
27#include <linux/vmalloc.h>
28#include <linux/tegra_gpu_t19x.h>
29#include <uapi/linux/nvgpu.h>
30
31#include <soc/tegra/fuse.h>
32
33#include <nvgpu/timers.h>
34#include <nvgpu/gmmu.h>
35#include <nvgpu/dma.h>
36#include <nvgpu/log.h>
37#include <nvgpu/debug.h>
38#include <nvgpu/enabled.h>
39
40#include "gk20a/gk20a.h"
41#include "gk20a/gr_gk20a.h"
42#include "gk20a/dbg_gpu_gk20a.h"
43#include "gk20a/regops_gk20a.h"
44#include "gk20a/gr_pri_gk20a.h"
45
46#include "gm20b/gr_gm20b.h"
47
48#include "gp10b/gr_gp10b.h"
49
50#include "gv11b/gr_gv11b.h"
51#include "gv11b/mm_gv11b.h"
52#include "gv11b/subctx_gv11b.h"
53
54#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
55#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
56#include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
57#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
58#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
59#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
60#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
61#include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
62#include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
63
64bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
65{
66 bool valid = false;
67
68 switch (class_num) {
69 case VOLTA_COMPUTE_A:
70 case VOLTA_A:
71 case VOLTA_DMA_COPY_A:
72 valid = true;
73 break;
74
75 case MAXWELL_COMPUTE_B:
76 case MAXWELL_B:
77 case FERMI_TWOD_A:
78 case KEPLER_DMA_COPY_A:
79 case MAXWELL_DMA_COPY_A:
80 case PASCAL_COMPUTE_A:
81 case PASCAL_A:
82 case PASCAL_DMA_COPY_A:
83 valid = true;
84 break;
85
86 default:
87 break;
88 }
89 gk20a_dbg_info("class=0x%x valid=%d", class_num, valid);
90 return valid;
91}
92
93bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
94{
95 bool valid = false;
96
97 switch (class_num) {
98 case VOLTA_A:
99 case PASCAL_A:
100 case MAXWELL_B:
101 valid = true;
102 break;
103
104 default:
105 break;
106 }
107 return valid;
108}
109
110bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
111{
112 bool valid = false;
113
114 switch (class_num) {
115 case VOLTA_COMPUTE_A:
116 case PASCAL_COMPUTE_A:
117 case MAXWELL_COMPUTE_B:
118 valid = true;
119 break;
120
121 default:
122 break;
123 }
124 return valid;
125}
126
127static u32 gv11b_gr_sm_offset(struct gk20a *g, u32 sm)
128{
129
130 u32 sm_pri_stride = nvgpu_get_litter_value(g, GPU_LIT_SM_PRI_STRIDE);
131 u32 sm_offset = sm_pri_stride * sm;
132
133 return sm_offset;
134}
135
136static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
137 bool *post_event, struct channel_gk20a *fault_ch,
138 u32 *hww_global_esr)
139{
140 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
141 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
142 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
143 u32 l1_tag_ecc_status, l1_tag_ecc_corrected_err_status = 0;
144 u32 l1_tag_ecc_uncorrected_err_status = 0;
145 u32 l1_tag_corrected_err_count_delta = 0;
146 u32 l1_tag_uncorrected_err_count_delta = 0;
147 bool is_l1_tag_ecc_corrected_total_err_overflow = 0;
148 bool is_l1_tag_ecc_uncorrected_total_err_overflow = 0;
149
150 /* Check for L1 tag ECC errors. */
151 l1_tag_ecc_status = gk20a_readl(g,
152 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset);
153 l1_tag_ecc_corrected_err_status = l1_tag_ecc_status &
154 (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() |
155 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m() |
156 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m() |
157 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m());
158 l1_tag_ecc_uncorrected_err_status = l1_tag_ecc_status &
159 (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() |
160 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m() |
161 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m() |
162 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m());
163
164 if ((l1_tag_ecc_corrected_err_status == 0) && (l1_tag_ecc_uncorrected_err_status == 0))
165 return 0;
166
167 l1_tag_corrected_err_count_delta =
168 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_v(
169 gk20a_readl(g,
170 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() +
171 offset));
172 l1_tag_uncorrected_err_count_delta =
173 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_v(
174 gk20a_readl(g,
175 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() +
176 offset));
177 is_l1_tag_ecc_corrected_total_err_overflow =
178 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_total_counter_overflow_v(l1_tag_ecc_status);
179 is_l1_tag_ecc_uncorrected_total_err_overflow =
180 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_total_counter_overflow_v(l1_tag_ecc_status);
181
182 if ((l1_tag_corrected_err_count_delta > 0) || is_l1_tag_ecc_corrected_total_err_overflow) {
183 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
184 "corrected error (SBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]",
185 l1_tag_ecc_corrected_err_status, is_l1_tag_ecc_corrected_total_err_overflow);
186
187 /* HW uses 16-bits counter */
188 l1_tag_corrected_err_count_delta +=
189 (is_l1_tag_ecc_corrected_total_err_overflow <<
190 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
191 g->ecc.gr.t19x.sm_l1_tag_corrected_err_count.counters[tpc] +=
192 l1_tag_corrected_err_count_delta;
193 gk20a_writel(g,
194 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
195 0);
196 }
197 if ((l1_tag_uncorrected_err_count_delta > 0) || is_l1_tag_ecc_uncorrected_total_err_overflow) {
198 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
199 "Uncorrected error (DBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]",
200 l1_tag_ecc_uncorrected_err_status, is_l1_tag_ecc_uncorrected_total_err_overflow);
201
202 /* HW uses 16-bits counter */
203 l1_tag_uncorrected_err_count_delta +=
204 (is_l1_tag_ecc_uncorrected_total_err_overflow <<
205 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
206 g->ecc.gr.t19x.sm_l1_tag_uncorrected_err_count.counters[tpc] +=
207 l1_tag_uncorrected_err_count_delta;
208 gk20a_writel(g,
209 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
210 0);
211 }
212
213 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset,
214 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_reset_task_f());
215
216 return 0;
217
218}
219
220static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
221 bool *post_event, struct channel_gk20a *fault_ch,
222 u32 *hww_global_esr)
223{
224 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
225 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
226 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
227 u32 lrf_ecc_status, lrf_ecc_corrected_err_status = 0;
228 u32 lrf_ecc_uncorrected_err_status = 0;
229 u32 lrf_corrected_err_count_delta = 0;
230 u32 lrf_uncorrected_err_count_delta = 0;
231 bool is_lrf_ecc_corrected_total_err_overflow = 0;
232 bool is_lrf_ecc_uncorrected_total_err_overflow = 0;
233
234 /* Check for LRF ECC errors. */
235 lrf_ecc_status = gk20a_readl(g,
236 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
237 lrf_ecc_corrected_err_status = lrf_ecc_status &
238 (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp0_m() |
239 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp1_m() |
240 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp2_m() |
241 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp3_m() |
242 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp4_m() |
243 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp5_m() |
244 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp6_m() |
245 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp7_m());
246 lrf_ecc_uncorrected_err_status = lrf_ecc_status &
247 (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp0_m() |
248 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp1_m() |
249 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp2_m() |
250 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp3_m() |
251 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp4_m() |
252 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp5_m() |
253 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp6_m() |
254 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp7_m());
255
256 if ((lrf_ecc_corrected_err_status == 0) && (lrf_ecc_uncorrected_err_status == 0))
257 return 0;
258
259 lrf_corrected_err_count_delta =
260 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_v(
261 gk20a_readl(g,
262 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() +
263 offset));
264 lrf_uncorrected_err_count_delta =
265 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_v(
266 gk20a_readl(g,
267 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() +
268 offset));
269 is_lrf_ecc_corrected_total_err_overflow =
270 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_total_counter_overflow_v(lrf_ecc_status);
271 is_lrf_ecc_uncorrected_total_err_overflow =
272 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_total_counter_overflow_v(lrf_ecc_status);
273
274 if ((lrf_corrected_err_count_delta > 0) || is_lrf_ecc_corrected_total_err_overflow) {
275 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
276 "corrected error (SBE) detected in SM LRF! err_mask [%08x] is_overf [%d]",
277 lrf_ecc_corrected_err_status, is_lrf_ecc_corrected_total_err_overflow);
278
279 /* HW uses 16-bits counter */
280 lrf_corrected_err_count_delta +=
281 (is_lrf_ecc_corrected_total_err_overflow <<
282 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
283 g->ecc.gr.t18x.sm_lrf_single_err_count.counters[tpc] +=
284 lrf_corrected_err_count_delta;
285 gk20a_writel(g,
286 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
287 0);
288 }
289 if ((lrf_uncorrected_err_count_delta > 0) || is_lrf_ecc_uncorrected_total_err_overflow) {
290 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
291 "Uncorrected error (DBE) detected in SM LRF! err_mask [%08x] is_overf [%d]",
292 lrf_ecc_uncorrected_err_status, is_lrf_ecc_uncorrected_total_err_overflow);
293
294 /* HW uses 16-bits counter */
295 lrf_uncorrected_err_count_delta +=
296 (is_lrf_ecc_uncorrected_total_err_overflow <<
297 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
298 g->ecc.gr.t18x.sm_lrf_double_err_count.counters[tpc] +=
299 lrf_uncorrected_err_count_delta;
300 gk20a_writel(g,
301 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
302 0);
303 }
304
305 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
306 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_reset_task_f());
307
308 return 0;
309
310}
311
312void gr_gv11b_enable_hww_exceptions(struct gk20a *g)
313{
314 /* enable exceptions */
315 gk20a_writel(g, gr_fe_hww_esr_r(),
316 gr_fe_hww_esr_en_enable_f() |
317 gr_fe_hww_esr_reset_active_f());
318 gk20a_writel(g, gr_memfmt_hww_esr_r(),
319 gr_memfmt_hww_esr_en_enable_f() |
320 gr_memfmt_hww_esr_reset_active_f());
321}
322
323void gr_gv11b_enable_exceptions(struct gk20a *g)
324{
325 struct gr_gk20a *gr = &g->gr;
326 u32 reg_val;
327
328 /*
329 * clear exceptions :
330 * other than SM : hww_esr are reset in *enable_hww_excetpions*
331 * SM : cleared in *set_hww_esr_report_mask*
332 */
333
334 /* enable exceptions */
335 gk20a_writel(g, gr_exception2_en_r(), 0x0); /* BE not enabled */
336 gk20a_writel(g, gr_exception1_en_r(), (1 << gr->gpc_count) - 1);
337
338 reg_val = gr_exception_en_fe_enabled_f() |
339 gr_exception_en_memfmt_enabled_f() |
340 gr_exception_en_ds_enabled_f() |
341 gr_exception_en_gpc_enabled_f();
342 gk20a_writel(g, gr_exception_en_r(), reg_val);
343
344}
345
346static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
347 bool *post_event, struct channel_gk20a *fault_ch,
348 u32 *hww_global_esr)
349{
350 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
351 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
352 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
353 u32 cbu_ecc_status, cbu_ecc_corrected_err_status = 0;
354 u32 cbu_ecc_uncorrected_err_status = 0;
355 u32 cbu_corrected_err_count_delta = 0;
356 u32 cbu_uncorrected_err_count_delta = 0;
357 bool is_cbu_ecc_corrected_total_err_overflow = 0;
358 bool is_cbu_ecc_uncorrected_total_err_overflow = 0;
359
360 /* Check for CBU ECC errors. */
361 cbu_ecc_status = gk20a_readl(g,
362 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_r() + offset);
363 cbu_ecc_corrected_err_status = cbu_ecc_status &
364 (gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_warp_sm0_m() |
365 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_warp_sm1_m() |
366 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_barrier_sm0_m() |
367 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_barrier_sm1_m());
368 cbu_ecc_uncorrected_err_status = cbu_ecc_status &
369 (gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_warp_sm0_m() |
370 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_warp_sm1_m() |
371 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_barrier_sm0_m() |
372 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_barrier_sm1_m());
373
374 if ((cbu_ecc_corrected_err_status == 0) && (cbu_ecc_uncorrected_err_status == 0))
375 return 0;
376
377 cbu_corrected_err_count_delta =
378 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_v(
379 gk20a_readl(g,
380 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() +
381 offset));
382 cbu_uncorrected_err_count_delta =
383 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_v(
384 gk20a_readl(g,
385 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() +
386 offset));
387 is_cbu_ecc_corrected_total_err_overflow =
388 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_total_counter_overflow_v(cbu_ecc_status);
389 is_cbu_ecc_uncorrected_total_err_overflow =
390 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_total_counter_overflow_v(cbu_ecc_status);
391
392 if ((cbu_corrected_err_count_delta > 0) || is_cbu_ecc_corrected_total_err_overflow) {
393 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
394 "corrected error (SBE) detected in SM CBU! err_mask [%08x] is_overf [%d]",
395 cbu_ecc_corrected_err_status, is_cbu_ecc_corrected_total_err_overflow);
396
397 /* HW uses 16-bits counter */
398 cbu_corrected_err_count_delta +=
399 (is_cbu_ecc_corrected_total_err_overflow <<
400 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
401 g->ecc.gr.t19x.sm_cbu_corrected_err_count.counters[tpc] +=
402 cbu_corrected_err_count_delta;
403 gk20a_writel(g,
404 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
405 0);
406 }
407 if ((cbu_uncorrected_err_count_delta > 0) || is_cbu_ecc_uncorrected_total_err_overflow) {
408 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
409 "Uncorrected error (DBE) detected in SM CBU! err_mask [%08x] is_overf [%d]",
410 cbu_ecc_uncorrected_err_status, is_cbu_ecc_uncorrected_total_err_overflow);
411
412 /* HW uses 16-bits counter */
413 cbu_uncorrected_err_count_delta +=
414 (is_cbu_ecc_uncorrected_total_err_overflow <<
415 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
416 g->ecc.gr.t19x.sm_cbu_uncorrected_err_count.counters[tpc] +=
417 cbu_uncorrected_err_count_delta;
418 gk20a_writel(g,
419 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
420 0);
421 }
422
423 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_status_r() + offset,
424 gr_pri_gpc0_tpc0_sm_cbu_ecc_status_reset_task_f());
425
426 return 0;
427
428}
429
430static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
431 bool *post_event, struct channel_gk20a *fault_ch,
432 u32 *hww_global_esr)
433{
434 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
435 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
436 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
437 u32 l1_data_ecc_status, l1_data_ecc_corrected_err_status = 0;
438 u32 l1_data_ecc_uncorrected_err_status = 0;
439 u32 l1_data_corrected_err_count_delta = 0;
440 u32 l1_data_uncorrected_err_count_delta = 0;
441 bool is_l1_data_ecc_corrected_total_err_overflow = 0;
442 bool is_l1_data_ecc_uncorrected_total_err_overflow = 0;
443
444 /* Check for L1 data ECC errors. */
445 l1_data_ecc_status = gk20a_readl(g,
446 gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset);
447 l1_data_ecc_corrected_err_status = l1_data_ecc_status &
448 (gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_el1_0_m() |
449 gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_el1_1_m());
450 l1_data_ecc_uncorrected_err_status = l1_data_ecc_status &
451 (gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_el1_0_m() |
452 gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_el1_1_m());
453
454 if ((l1_data_ecc_corrected_err_status == 0) && (l1_data_ecc_uncorrected_err_status == 0))
455 return 0;
456
457 l1_data_corrected_err_count_delta =
458 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_v(
459 gk20a_readl(g,
460 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() +
461 offset));
462 l1_data_uncorrected_err_count_delta =
463 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_v(
464 gk20a_readl(g,
465 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() +
466 offset));
467 is_l1_data_ecc_corrected_total_err_overflow =
468 gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_total_counter_overflow_v(l1_data_ecc_status);
469 is_l1_data_ecc_uncorrected_total_err_overflow =
470 gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_total_counter_overflow_v(l1_data_ecc_status);
471
472 if ((l1_data_corrected_err_count_delta > 0) || is_l1_data_ecc_corrected_total_err_overflow) {
473 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
474 "corrected error (SBE) detected in SM L1 data! err_mask [%08x] is_overf [%d]",
475 l1_data_ecc_corrected_err_status, is_l1_data_ecc_corrected_total_err_overflow);
476
477 /* HW uses 16-bits counter */
478 l1_data_corrected_err_count_delta +=
479 (is_l1_data_ecc_corrected_total_err_overflow <<
480 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
481 g->ecc.gr.t19x.sm_l1_data_corrected_err_count.counters[tpc] +=
482 l1_data_corrected_err_count_delta;
483 gk20a_writel(g,
484 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
485 0);
486 }
487 if ((l1_data_uncorrected_err_count_delta > 0) || is_l1_data_ecc_uncorrected_total_err_overflow) {
488 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
489 "Uncorrected error (DBE) detected in SM L1 data! err_mask [%08x] is_overf [%d]",
490 l1_data_ecc_uncorrected_err_status, is_l1_data_ecc_uncorrected_total_err_overflow);
491
492 /* HW uses 16-bits counter */
493 l1_data_uncorrected_err_count_delta +=
494 (is_l1_data_ecc_uncorrected_total_err_overflow <<
495 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
496 g->ecc.gr.t19x.sm_l1_data_uncorrected_err_count.counters[tpc] +=
497 l1_data_uncorrected_err_count_delta;
498 gk20a_writel(g,
499 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
500 0);
501 }
502
503 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset,
504 gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_reset_task_f());
505
506 return 0;
507
508}
509
510static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
511 bool *post_event, struct channel_gk20a *fault_ch,
512 u32 *hww_global_esr)
513{
514 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
515 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
516 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
517 u32 icache_ecc_status, icache_ecc_corrected_err_status = 0;
518 u32 icache_ecc_uncorrected_err_status = 0;
519 u32 icache_corrected_err_count_delta = 0;
520 u32 icache_uncorrected_err_count_delta = 0;
521 bool is_icache_ecc_corrected_total_err_overflow = 0;
522 bool is_icache_ecc_uncorrected_total_err_overflow = 0;
523
524 /* Check for L0 && L1 icache ECC errors. */
525 icache_ecc_status = gk20a_readl(g,
526 gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset);
527 icache_ecc_corrected_err_status = icache_ecc_status &
528 (gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m() |
529 gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m() |
530 gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m() |
531 gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m());
532 icache_ecc_uncorrected_err_status = icache_ecc_status &
533 (gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m() |
534 gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m() |
535 gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m() |
536 gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m());
537
538 if ((icache_ecc_corrected_err_status == 0) && (icache_ecc_uncorrected_err_status == 0))
539 return 0;
540
541 icache_corrected_err_count_delta =
542 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_v(
543 gk20a_readl(g,
544 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() +
545 offset));
546 icache_uncorrected_err_count_delta =
547 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_v(
548 gk20a_readl(g,
549 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() +
550 offset));
551 is_icache_ecc_corrected_total_err_overflow =
552 gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_total_counter_overflow_v(icache_ecc_status);
553 is_icache_ecc_uncorrected_total_err_overflow =
554 gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_total_counter_overflow_v(icache_ecc_status);
555
556 if ((icache_corrected_err_count_delta > 0) || is_icache_ecc_corrected_total_err_overflow) {
557 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
558 "corrected error (SBE) detected in SM L0 && L1 icache! err_mask [%08x] is_overf [%d]",
559 icache_ecc_corrected_err_status, is_icache_ecc_corrected_total_err_overflow);
560
561 /* HW uses 16-bits counter */
562 icache_corrected_err_count_delta +=
563 (is_icache_ecc_corrected_total_err_overflow <<
564 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
565 g->ecc.gr.t19x.sm_icache_corrected_err_count.counters[tpc] +=
566 icache_corrected_err_count_delta;
567 gk20a_writel(g,
568 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
569 0);
570 }
571 if ((icache_uncorrected_err_count_delta > 0) || is_icache_ecc_uncorrected_total_err_overflow) {
572 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
573 "Uncorrected error (DBE) detected in SM L0 && L1 icache! err_mask [%08x] is_overf [%d]",
574 icache_ecc_uncorrected_err_status, is_icache_ecc_uncorrected_total_err_overflow);
575
576 /* HW uses 16-bits counter */
577 icache_uncorrected_err_count_delta +=
578 (is_icache_ecc_uncorrected_total_err_overflow <<
579 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
580 g->ecc.gr.t19x.sm_icache_uncorrected_err_count.counters[tpc] +=
581 icache_uncorrected_err_count_delta;
582 gk20a_writel(g,
583 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
584 0);
585 }
586
587 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset,
588 gr_pri_gpc0_tpc0_sm_icache_ecc_status_reset_task_f());
589
590 return 0;
591
592}
593
594int gr_gv11b_handle_tpc_sm_ecc_exception(struct gk20a *g,
595 u32 gpc, u32 tpc,
596 bool *post_event, struct channel_gk20a *fault_ch,
597 u32 *hww_global_esr)
598{
599 int ret = 0;
600
601 /* Check for L1 tag ECC errors. */
602 gr_gv11b_handle_l1_tag_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
603
604 /* Check for LRF ECC errors. */
605 gr_gv11b_handle_lrf_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
606
607 /* Check for CBU ECC errors. */
608 gr_gv11b_handle_cbu_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
609
610 /* Check for L1 data ECC errors. */
611 gr_gv11b_handle_l1_data_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
612
613 /* Check for L0 && L1 icache ECC errors. */
614 gr_gv11b_handle_icache_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
615
616 return ret;
617}
618
619int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
620 bool *post_event, struct channel_gk20a *fault_ch,
621 u32 *hww_global_esr)
622{
623 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
624 u32 offset = gpc_stride * gpc;
625 u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0;
626 u32 gcc_l15_ecc_uncorrected_err_status = 0;
627 u32 gcc_l15_corrected_err_count_delta = 0;
628 u32 gcc_l15_uncorrected_err_count_delta = 0;
629 bool is_gcc_l15_ecc_corrected_total_err_overflow = 0;
630 bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0;
631
632 /* Check for gcc l15 ECC errors. */
633 gcc_l15_ecc_status = gk20a_readl(g,
634 gr_pri_gpc0_gcc_l15_ecc_status_r() + offset);
635 gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status &
636 (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() |
637 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m());
638 gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status &
639 (gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() |
640 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m());
641
642 if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0))
643 return 0;
644
645 gcc_l15_corrected_err_count_delta =
646 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v(
647 gk20a_readl(g,
648 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() +
649 offset));
650 gcc_l15_uncorrected_err_count_delta =
651 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v(
652 gk20a_readl(g,
653 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() +
654 offset));
655 is_gcc_l15_ecc_corrected_total_err_overflow =
656 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status);
657 is_gcc_l15_ecc_uncorrected_total_err_overflow =
658 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status);
659
660 if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) {
661 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
662 "corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]",
663 gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow);
664
665 /* HW uses 16-bits counter */
666 gcc_l15_corrected_err_count_delta +=
667 (is_gcc_l15_ecc_corrected_total_err_overflow <<
668 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
669 g->ecc.gr.t19x.gcc_l15_corrected_err_count.counters[gpc] +=
670 gcc_l15_corrected_err_count_delta;
671 gk20a_writel(g,
672 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
673 0);
674 }
675 if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) {
676 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
677 "Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]",
678 gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow);
679
680 /* HW uses 16-bits counter */
681 gcc_l15_uncorrected_err_count_delta +=
682 (is_gcc_l15_ecc_uncorrected_total_err_overflow <<
683 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
684 g->ecc.gr.t19x.gcc_l15_uncorrected_err_count.counters[gpc] +=
685 gcc_l15_uncorrected_err_count_delta;
686 gk20a_writel(g,
687 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
688 0);
689 }
690
691 gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset,
692 gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f());
693
694 return 0;
695}
696
697static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
698 u32 exception)
699{
700 int ret = 0;
701 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
702 u32 offset = gpc_stride * gpc;
703 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
704 u32 corrected_delta, uncorrected_delta;
705 u32 corrected_overflow, uncorrected_overflow;
706 int hww_esr;
707
708 hww_esr = gk20a_readl(g, gr_gpc0_mmu_gpcmmu_global_esr_r() + offset);
709
710 if (!(hww_esr & (gr_gpc0_mmu_gpcmmu_global_esr_ecc_corrected_m() |
711 gr_gpc0_mmu_gpcmmu_global_esr_ecc_uncorrected_m())))
712 return ret;
713
714 ecc_status = gk20a_readl(g,
715 gr_gpc0_mmu_l1tlb_ecc_status_r() + offset);
716 ecc_addr = gk20a_readl(g,
717 gr_gpc0_mmu_l1tlb_ecc_address_r() + offset);
718 corrected_cnt = gk20a_readl(g,
719 gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_r() + offset);
720 uncorrected_cnt = gk20a_readl(g,
721 gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_r() + offset);
722
723 corrected_delta = gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_v(
724 corrected_cnt);
725 uncorrected_delta = gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_v(
726 uncorrected_cnt);
727 corrected_overflow = ecc_status &
728 gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_total_counter_overflow_m();
729
730 uncorrected_overflow = ecc_status &
731 gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_total_counter_overflow_m();
732
733
734 /* clear the interrupt */
735 if ((corrected_delta > 0) || corrected_overflow)
736 gk20a_writel(g,
737 gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_r() +
738 offset, 0);
739 if ((uncorrected_delta > 0) || uncorrected_overflow)
740 gk20a_writel(g,
741 gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_r() +
742 offset, 0);
743
744 gk20a_writel(g, gr_gpc0_mmu_l1tlb_ecc_status_r() + offset,
745 gr_gpc0_mmu_l1tlb_ecc_status_reset_task_f());
746
747 /* Handle overflow */
748 if (corrected_overflow)
749 corrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_s());
750 if (uncorrected_overflow)
751 uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
752
753
754 g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count.counters[gpc] +=
755 corrected_delta;
756 g->ecc.gr.t19x.mmu_l1tlb_uncorrected_err_count.counters[gpc] +=
757 uncorrected_delta;
758 nvgpu_log(g, gpu_dbg_intr,
759 "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
760
761 if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m())
762 nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
763 if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m())
764 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
765 if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m())
766 nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error");
767 if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m())
768 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error");
769 if (corrected_overflow || uncorrected_overflow)
770 nvgpu_info(g, "mmu l1tlb ecc counter overflow!");
771
772 nvgpu_log(g, gpu_dbg_intr,
773 "ecc error address: 0x%x", ecc_addr);
774 nvgpu_log(g, gpu_dbg_intr,
775 "ecc error count corrected: %d, uncorrected %d",
776 g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count.counters[gpc],
777 g->ecc.gr.t19x.mmu_l1tlb_uncorrected_err_count.counters[gpc]);
778
779 return ret;
780}
781
782static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
783 u32 exception)
784{
785 int ret = 0;
786 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
787 u32 offset = gpc_stride * gpc;
788 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
789 u32 corrected_delta, uncorrected_delta;
790 u32 corrected_overflow, uncorrected_overflow;
791 int hww_esr;
792
793 hww_esr = gk20a_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset);
794
795 if (!(hww_esr & (gr_gpc0_gpccs_hww_esr_ecc_uncorrected_m() |
796 gr_gpc0_gpccs_hww_esr_ecc_corrected_m())))
797 return ret;
798
799 ecc_status = gk20a_readl(g,
800 gr_gpc0_gpccs_falcon_ecc_status_r() + offset);
801 ecc_addr = gk20a_readl(g,
802 gr_gpc0_gpccs_falcon_ecc_address_r() + offset);
803 corrected_cnt = gk20a_readl(g,
804 gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + offset);
805 uncorrected_cnt = gk20a_readl(g,
806 gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset);
807
808 corrected_delta = gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v(
809 corrected_cnt);
810 uncorrected_delta = gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v(
811 uncorrected_cnt);
812 corrected_overflow = ecc_status &
813 gr_gpc0_gpccs_falcon_ecc_status_corrected_err_total_counter_overflow_m();
814
815 uncorrected_overflow = ecc_status &
816 gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m();
817
818
819 /* clear the interrupt */
820 if ((corrected_delta > 0) || corrected_overflow)
821 gk20a_writel(g,
822 gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() +
823 offset, 0);
824 if ((uncorrected_delta > 0) || uncorrected_overflow)
825 gk20a_writel(g,
826 gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() +
827 offset, 0);
828
829 gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
830 gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
831
832 g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc] +=
833 corrected_delta;
834 g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc] +=
835 uncorrected_delta;
836 nvgpu_log(g, gpu_dbg_intr,
837 "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
838
839 if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m())
840 nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
841 if (ecc_status &
842 gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m())
843 nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
844 if (ecc_status &
845 gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m())
846 nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
847 if (ecc_status &
848 gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m())
849 nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
850 if (corrected_overflow || uncorrected_overflow)
851 nvgpu_info(g, "gpccs ecc counter overflow!");
852
853 nvgpu_log(g, gpu_dbg_intr,
854 "ecc error row address: 0x%x",
855 gr_gpc0_gpccs_falcon_ecc_address_row_address_v(ecc_addr));
856
857 nvgpu_log(g, gpu_dbg_intr,
858 "ecc error count corrected: %d, uncorrected %d",
859 g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc],
860 g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc]);
861
862 return ret;
863}
864
865int gr_gv11b_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
866 u32 gpc_exception)
867{
868 if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpcmmu_m())
869 return gr_gv11b_handle_gpcmmu_ecc_exception(g, gpc,
870 gpc_exception);
871 return 0;
872}
873
874int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
875 u32 gpc_exception)
876{
877 if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpccs_m())
878 return gr_gv11b_handle_gpccs_ecc_exception(g, gpc,
879 gpc_exception);
880
881 return 0;
882}
883
884void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
885{
886 struct gr_gk20a *gr = &g->gr;
887 u32 tpc_mask;
888
889 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
890 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f() |
891 gr_gpcs_tpcs_tpccs_tpc_exception_en_mpc_enabled_f());
892
893 tpc_mask =
894 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1);
895
896 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
897 (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) |
898 gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) |
899 gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1)));
900}
901
902int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
903 bool *post_event)
904{
905 return 0;
906}
907
908int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr,
909 struct zbc_query_params *query_params)
910{
911 u32 index = query_params->index_size;
912
913 if (index >= GK20A_ZBC_TABLE_SIZE) {
914 nvgpu_err(g, "invalid zbc stencil table index");
915 return -EINVAL;
916 }
917 query_params->depth = gr->zbc_s_tbl[index].stencil;
918 query_params->format = gr->zbc_s_tbl[index].format;
919 query_params->ref_cnt = gr->zbc_s_tbl[index].ref_cnt;
920
921 return 0;
922}
923
924bool gr_gv11b_add_zbc_type_s(struct gk20a *g, struct gr_gk20a *gr,
925 struct zbc_entry *zbc_val, int *ret_val)
926{
927 struct zbc_s_table *s_tbl;
928 u32 i;
929 bool added = false;
930
931 *ret_val = -ENOMEM;
932
933 /* search existing tables */
934 for (i = 0; i < gr->max_used_s_index; i++) {
935
936 s_tbl = &gr->zbc_s_tbl[i];
937
938 if (s_tbl->ref_cnt &&
939 s_tbl->stencil == zbc_val->depth &&
940 s_tbl->format == zbc_val->format) {
941 added = true;
942 s_tbl->ref_cnt++;
943 *ret_val = 0;
944 break;
945 }
946 }
947 /* add new table */
948 if (!added &&
949 gr->max_used_s_index < GK20A_ZBC_TABLE_SIZE) {
950
951 s_tbl = &gr->zbc_s_tbl[gr->max_used_s_index];
952 WARN_ON(s_tbl->ref_cnt != 0);
953
954 *ret_val = g->ops.gr.add_zbc_s(g, gr,
955 zbc_val, gr->max_used_s_index);
956
957 if (!(*ret_val))
958 gr->max_used_s_index++;
959 }
960 return added;
961}
962
963int gr_gv11b_add_zbc_stencil(struct gk20a *g, struct gr_gk20a *gr,
964 struct zbc_entry *stencil_val, u32 index)
965{
966 u32 zbc_s;
967
968 /* update l2 table */
969 g->ops.ltc.set_zbc_s_entry(g, stencil_val, index);
970
971 /* update local copy */
972 gr->zbc_s_tbl[index].stencil = stencil_val->depth;
973 gr->zbc_s_tbl[index].format = stencil_val->format;
974 gr->zbc_s_tbl[index].ref_cnt++;
975
976 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_s_r(index), stencil_val->depth);
977 zbc_s = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_s_01_to_04_format_r() +
978 (index & ~3));
979 zbc_s &= ~(0x7f << (index % 4) * 7);
980 zbc_s |= stencil_val->format << (index % 4) * 7;
981 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_s_01_to_04_format_r() +
982 (index & ~3), zbc_s);
983
984 return 0;
985}
986
987int gr_gv11b_load_stencil_default_tbl(struct gk20a *g,
988 struct gr_gk20a *gr)
989{
990 struct zbc_entry zbc_val;
991 u32 err;
992
993 /* load default stencil table */
994 zbc_val.type = GV11B_ZBC_TYPE_STENCIL;
995
996 zbc_val.depth = 0x0;
997 zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8;
998 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
999
1000 zbc_val.depth = 0x1;
1001 zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8;
1002 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
1003
1004 zbc_val.depth = 0xff;
1005 zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8;
1006 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
1007
1008 if (!err) {
1009 gr->max_default_s_index = 3;
1010 } else {
1011 nvgpu_err(g, "fail to load default zbc stencil table");
1012 return err;
1013 }
1014
1015 return 0;
1016}
1017
1018int gr_gv11b_load_stencil_tbl(struct gk20a *g, struct gr_gk20a *gr)
1019{
1020 int ret;
1021 u32 i;
1022
1023 for (i = 0; i < gr->max_used_s_index; i++) {
1024 struct zbc_s_table *s_tbl = &gr->zbc_s_tbl[i];
1025 struct zbc_entry zbc_val;
1026
1027 zbc_val.type = GV11B_ZBC_TYPE_STENCIL;
1028 zbc_val.depth = s_tbl->stencil;
1029 zbc_val.format = s_tbl->format;
1030
1031 ret = g->ops.gr.add_zbc_s(g, gr, &zbc_val, i);
1032 if (ret)
1033 return ret;
1034 }
1035 return 0;
1036}
1037
1038u32 gr_gv11b_pagepool_default_size(struct gk20a *g)
1039{
1040 return gr_scc_pagepool_total_pages_hwmax_value_v();
1041}
1042
1043int gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g)
1044{
1045 struct gr_gk20a *gr = &g->gr;
1046 int size;
1047
1048 gr->attrib_cb_size = gr->attrib_cb_default_size;
1049 gr->alpha_cb_size = gr->alpha_cb_default_size;
1050
1051 gr->attrib_cb_size = min(gr->attrib_cb_size,
1052 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count);
1053 gr->alpha_cb_size = min(gr->alpha_cb_size,
1054 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count);
1055
1056 size = gr->attrib_cb_size *
1057 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
1058 gr->max_tpc_count;
1059
1060 size += gr->alpha_cb_size *
1061 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
1062 gr->max_tpc_count;
1063
1064 size = ALIGN(size, 128);
1065
1066 return size;
1067}
1068
1069static void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data)
1070{
1071 gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
1072}
1073
1074static void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
1075{
1076 u32 val;
1077
1078 gk20a_dbg_fn("");
1079
1080 val = gk20a_readl(g, gr_gpcs_tc_debug0_r());
1081 val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(),
1082 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data));
1083 gk20a_writel(g, gr_gpcs_tc_debug0_r(), val);
1084
1085 gk20a_dbg_fn("done");
1086}
1087
1088static void gr_gv11b_set_tex_in_dbg(struct gk20a *g, u32 data)
1089{
1090 u32 val;
1091 bool flag;
1092
1093 gk20a_dbg_fn("");
1094
1095 val = gk20a_readl(g, gr_gpcs_tpcs_tex_in_dbg_r());
1096 flag = (data & NVC397_SET_TEX_IN_DBG_TSL1_RVCH_INVALIDATE) ? 1 : 0;
1097 val = set_field(val, gr_gpcs_tpcs_tex_in_dbg_tsl1_rvch_invalidate_m(),
1098 gr_gpcs_tpcs_tex_in_dbg_tsl1_rvch_invalidate_f(flag));
1099 gk20a_writel(g, gr_gpcs_tpcs_tex_in_dbg_r(), val);
1100
1101 val = gk20a_readl(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r());
1102 flag = (data &
1103 NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_LD) ? 1 : 0;
1104 val = set_field(val, gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_ld_m(),
1105 gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_ld_f(flag));
1106 flag = (data &
1107 NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_ST) ? 1 : 0;
1108 val = set_field(val, gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_st_m(),
1109 gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_st_f(flag));
1110 gk20a_writel(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r(), val);
1111}
1112
1113static void gr_gv11b_set_skedcheck(struct gk20a *g, u32 data)
1114{
1115 u32 reg_val;
1116
1117 reg_val = gk20a_readl(g, gr_sked_hww_esr_en_r());
1118
1119 if ((data & NVC397_SET_SKEDCHECK_18_MASK) ==
1120 NVC397_SET_SKEDCHECK_18_DISABLE) {
1121 reg_val = set_field(reg_val,
1122 gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(),
1123 gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_disabled_f()
1124 );
1125 } else if ((data & NVC397_SET_SKEDCHECK_18_MASK) ==
1126 NVC397_SET_SKEDCHECK_18_ENABLE) {
1127 reg_val = set_field(reg_val,
1128 gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(),
1129 gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_enabled_f()
1130 );
1131 }
1132 nvgpu_log_info(g, "sked_hww_esr_en = 0x%x", reg_val);
1133 gk20a_writel(g, gr_sked_hww_esr_en_r(), reg_val);
1134
1135}
1136
1137static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data)
1138{
1139 gk20a_dbg_fn("");
1140
1141 if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) {
1142 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(),
1143 0);
1144 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(),
1145 0);
1146 } else {
1147 g->ops.gr.set_hww_esr_report_mask(g);
1148 }
1149}
1150
1151int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr,
1152 u32 class_num, u32 offset, u32 data)
1153{
1154 gk20a_dbg_fn("");
1155
1156 if (class_num == VOLTA_COMPUTE_A) {
1157 switch (offset << 2) {
1158 case NVC0C0_SET_SHADER_EXCEPTIONS:
1159 gv11b_gr_set_shader_exceptions(g, data);
1160 break;
1161 case NVC3C0_SET_SKEDCHECK:
1162 gr_gv11b_set_skedcheck(g, data);
1163 break;
1164 default:
1165 goto fail;
1166 }
1167 }
1168
1169 if (class_num == VOLTA_A) {
1170 switch (offset << 2) {
1171 case NVC397_SET_SHADER_EXCEPTIONS:
1172 gv11b_gr_set_shader_exceptions(g, data);
1173 break;
1174 case NVC397_SET_CIRCULAR_BUFFER_SIZE:
1175 g->ops.gr.set_circular_buffer_size(g, data);
1176 break;
1177 case NVC397_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
1178 g->ops.gr.set_alpha_circular_buffer_size(g, data);
1179 break;
1180 case NVC397_SET_GO_IDLE_TIMEOUT:
1181 gr_gv11b_set_go_idle_timeout(g, data);
1182 break;
1183 case NVC097_SET_COALESCE_BUFFER_SIZE:
1184 gr_gv11b_set_coalesce_buffer_size(g, data);
1185 break;
1186 case NVC397_SET_TEX_IN_DBG:
1187 gr_gv11b_set_tex_in_dbg(g, data);
1188 break;
1189 case NVC397_SET_SKEDCHECK:
1190 gr_gv11b_set_skedcheck(g, data);
1191 break;
1192 case NVC397_SET_BES_CROP_DEBUG3:
1193 g->ops.gr.set_bes_crop_debug3(g, data);
1194 break;
1195 default:
1196 goto fail;
1197 }
1198 }
1199 return 0;
1200
1201fail:
1202 return -EINVAL;
1203}
1204
1205void gr_gv11b_bundle_cb_defaults(struct gk20a *g)
1206{
1207 struct gr_gk20a *gr = &g->gr;
1208
1209 gr->bundle_cb_default_size =
1210 gr_scc_bundle_cb_size_div_256b__prod_v();
1211 gr->min_gpm_fifo_depth =
1212 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
1213 gr->bundle_cb_token_limit =
1214 gr_pd_ab_dist_cfg2_token_limit_init_v();
1215}
1216
1217void gr_gv11b_cb_size_default(struct gk20a *g)
1218{
1219 struct gr_gk20a *gr = &g->gr;
1220
1221 if (!gr->attrib_cb_default_size)
1222 gr->attrib_cb_default_size =
1223 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
1224 gr->alpha_cb_default_size =
1225 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
1226}
1227
1228void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
1229{
1230 struct gr_gk20a *gr = &g->gr;
1231 u32 gpc_index, ppc_index, stride, val;
1232 u32 pd_ab_max_output;
1233 u32 alpha_cb_size = data * 4;
1234
1235 gk20a_dbg_fn("");
1236
1237 if (alpha_cb_size > gr->alpha_cb_size)
1238 alpha_cb_size = gr->alpha_cb_size;
1239
1240 gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
1241 (gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) &
1242 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
1243 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
1244
1245 pd_ab_max_output = alpha_cb_size *
1246 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
1247 gr_pd_ab_dist_cfg1_max_output_granularity_v();
1248
1249 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
1250 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
1251 gr_pd_ab_dist_cfg1_max_batches_init_f());
1252
1253 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1254 stride = proj_gpc_stride_v() * gpc_index;
1255
1256 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
1257 ppc_index++) {
1258
1259 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
1260 stride +
1261 proj_ppc_in_gpc_stride_v() * ppc_index);
1262
1263 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
1264 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
1265 gr->pes_tpc_count[ppc_index][gpc_index]));
1266
1267 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
1268 stride +
1269 proj_ppc_in_gpc_stride_v() * ppc_index, val);
1270 }
1271 }
1272}
1273
1274void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
1275{
1276 struct gr_gk20a *gr = &g->gr;
1277 u32 gpc_index, ppc_index, stride, val;
1278 u32 cb_size_steady = data * 4, cb_size;
1279
1280 gk20a_dbg_fn("");
1281
1282 if (cb_size_steady > gr->attrib_cb_size)
1283 cb_size_steady = gr->attrib_cb_size;
1284 if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
1285 gk20a_readl(g,
1286 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) {
1287 cb_size = cb_size_steady +
1288 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
1289 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
1290 } else {
1291 cb_size = cb_size_steady;
1292 }
1293
1294 gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(),
1295 (gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
1296 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
1297 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
1298
1299 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1300 stride = proj_gpc_stride_v() * gpc_index;
1301
1302 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
1303 ppc_index++) {
1304
1305 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
1306 stride +
1307 proj_ppc_in_gpc_stride_v() * ppc_index);
1308
1309 val = set_field(val,
1310 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
1311 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
1312 gr->pes_tpc_count[ppc_index][gpc_index]));
1313
1314 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
1315 stride +
1316 proj_ppc_in_gpc_stride_v() * ppc_index, val);
1317
1318 gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index +
1319 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
1320 stride,
1321 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
1322 cb_size_steady));
1323
1324 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
1325 ppc_index + gpc_index));
1326
1327 val = set_field(val,
1328 gr_gpcs_swdx_tc_beta_cb_size_v_m(),
1329 gr_gpcs_swdx_tc_beta_cb_size_v_f(
1330 cb_size_steady *
1331 gr->gpc_ppc_count[gpc_index]));
1332
1333 gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
1334 ppc_index + gpc_index), val);
1335 }
1336 }
1337}
1338
1339int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size,
1340 struct nvgpu_mem *mem)
1341{
1342 int err;
1343
1344 gk20a_dbg_fn("");
1345
1346 err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem);
1347 if (err)
1348 return err;
1349
1350 mem->gpu_va = nvgpu_gmmu_map(vm,
1351 mem,
1352 size,
1353 NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE,
1354 gk20a_mem_flag_none,
1355 false,
1356 mem->aperture);
1357
1358 if (!mem->gpu_va) {
1359 err = -ENOMEM;
1360 goto fail_free;
1361 }
1362
1363 return 0;
1364
1365fail_free:
1366 nvgpu_dma_free(vm->mm->g, mem);
1367 return err;
1368}
1369
1370static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
1371 struct gk20a_debug_output *o,
1372 u32 gpc, u32 tpc, u32 sm, u32 offset)
1373{
1374
1375 gk20a_debug_output(o,
1376 "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR: 0x%x\n",
1377 gpc, tpc, sm, gk20a_readl(g,
1378 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset));
1379
1380 gk20a_debug_output(o,
1381 "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR_REPORT_MASK: 0x%x\n",
1382 gpc, tpc, sm, gk20a_readl(g,
1383 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset));
1384
1385 gk20a_debug_output(o,
1386 "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR: 0x%x\n",
1387 gpc, tpc, sm, gk20a_readl(g,
1388 gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset));
1389
1390 gk20a_debug_output(o,
1391 "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n",
1392 gpc, tpc, sm, gk20a_readl(g,
1393 gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset));
1394
1395 gk20a_debug_output(o,
1396 "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_CONTROL0: 0x%x\n",
1397 gpc, tpc, sm, gk20a_readl(g,
1398 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset));
1399
1400 gk20a_debug_output(o,
1401 "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_STATUS0: 0x%x\n",
1402 gpc, tpc, sm, gk20a_readl(g,
1403 gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset));
1404}
1405
1406static int gr_gv11b_dump_gr_sm_regs(struct gk20a *g,
1407 struct gk20a_debug_output *o)
1408{
1409 u32 gpc, tpc, sm, sm_per_tpc;
1410 u32 gpc_offset, tpc_offset, offset;
1411
1412 gk20a_debug_output(o,
1413 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n",
1414 gk20a_readl(g,
1415 gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r()));
1416 gk20a_debug_output(o,
1417 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_WARP_ESR_REPORT_MASK: 0x%x\n",
1418 gk20a_readl(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r()));
1419 gk20a_debug_output(o,
1420 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR: 0x%x\n",
1421 gk20a_readl(g, gr_gpcs_tpcs_sms_hww_global_esr_r()));
1422 gk20a_debug_output(o,
1423 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_CONTROL0: 0x%x\n",
1424 gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_control0_r()));
1425 gk20a_debug_output(o,
1426 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_STATUS0: 0x%x\n",
1427 gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_status0_r()));
1428 gk20a_debug_output(o,
1429 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_0: 0x%x\n",
1430 gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r()));
1431 gk20a_debug_output(o,
1432 "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_1: 0x%x\n",
1433 gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r()));
1434
1435 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
1436 for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
1437 gpc_offset = gk20a_gr_gpc_offset(g, gpc);
1438
1439 for (tpc = 0; tpc < g->gr.tpc_count; tpc++) {
1440 tpc_offset = gk20a_gr_tpc_offset(g, tpc);
1441
1442 for (sm = 0; sm < sm_per_tpc; sm++) {
1443 offset = gpc_offset + tpc_offset +
1444 gv11b_gr_sm_offset(g, sm);
1445
1446 gr_gv11b_dump_gr_per_sm_regs(g, o,
1447 gpc, tpc, sm, offset);
1448 }
1449 }
1450 }
1451
1452 return 0;
1453}
1454
1455int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
1456 struct gk20a_debug_output *o)
1457{
1458 struct gr_gk20a *gr = &g->gr;
1459 u32 gr_engine_id;
1460
1461 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
1462
1463 gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
1464 gk20a_readl(g, gr_status_r()));
1465 gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
1466 gk20a_readl(g, gr_status_1_r()));
1467 gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
1468 gk20a_readl(g, gr_status_2_r()));
1469 gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
1470 gk20a_readl(g, gr_engine_status_r()));
1471 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
1472 gk20a_readl(g, gr_gpfifo_status_r()));
1473 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
1474 gk20a_readl(g, gr_gpfifo_ctl_r()));
1475 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
1476 gk20a_readl(g, gr_fecs_host_int_status_r()));
1477 gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n",
1478 gk20a_readl(g, gr_exception_r()));
1479 gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n",
1480 gk20a_readl(g, gr_fecs_intr_r()));
1481 gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
1482 gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
1483 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
1484 gk20a_readl(g, gr_activity_0_r()));
1485 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
1486 gk20a_readl(g, gr_activity_1_r()));
1487 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
1488 gk20a_readl(g, gr_activity_2_r()));
1489 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
1490 gk20a_readl(g, gr_activity_4_r()));
1491 gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
1492 gk20a_readl(g, gr_pri_sked_activity_r()));
1493 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1494 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
1495 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1496 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
1497 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1498 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
1499 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1500 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
1501 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1502 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
1503 if (gr->gpc_tpc_count[0] == 2)
1504 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1505 gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
1506 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1507 gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
1508 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1509 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
1510 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1511 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
1512 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1513 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
1514 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1515 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
1516 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1517 gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
1518 if (gr->gpc_tpc_count[0] == 2)
1519 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1520 gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
1521 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1522 gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
1523 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
1524 gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
1525 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
1526 gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
1527 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
1528 gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
1529 gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
1530 gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
1531 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
1532 gk20a_readl(g, gr_fe_go_idle_timeout_r()));
1533 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
1534 gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
1535 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
1536 gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
1537 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
1538 gk20a_readl(g, gr_cwd_fs_r()));
1539 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS(0): 0x%x\n",
1540 gk20a_readl(g, gr_fe_tpc_fs_r(0)));
1541 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID: 0x%x\n",
1542 gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
1543 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
1544 gk20a_readl(g, gr_cwd_sm_id_r(0)));
1545 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
1546 gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
1547 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
1548 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
1549 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
1550 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
1551 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
1552 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
1553 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
1554 gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
1555 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
1556 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
1557 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
1558 gk20a_readl(g, gr_fecs_current_ctx_r()));
1559 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
1560 gk20a_readl(g, gr_fecs_new_ctx_r()));
1561 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_ENABLE : 0x%x\n",
1562 gk20a_readl(g, gr_fecs_host_int_enable_r()));
1563 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
1564 gk20a_readl(g, gr_fecs_host_int_status_r()));
1565 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
1566 gk20a_readl(g, gr_pri_be0_crop_status1_r()));
1567 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
1568 gk20a_readl(g, gr_pri_bes_crop_status1_r()));
1569 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
1570 gk20a_readl(g, gr_pri_be0_zrop_status_r()));
1571 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
1572 gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
1573 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
1574 gk20a_readl(g, gr_pri_bes_zrop_status_r()));
1575 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
1576 gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
1577 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
1578 gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
1579 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
1580 gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
1581 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
1582 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
1583 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
1584 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
1585 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
1586 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
1587 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
1588 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
1589
1590 gr_gv11b_dump_gr_sm_regs(g, o);
1591
1592 return 0;
1593}
1594
1595static bool gr_activity_empty_or_preempted(u32 val)
1596{
1597 while (val) {
1598 u32 v = val & 7;
1599 if (v != gr_activity_4_gpc0_empty_v() &&
1600 v != gr_activity_4_gpc0_preempted_v())
1601 return false;
1602 val >>= 3;
1603 }
1604
1605 return true;
1606}
1607
1608int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1609 u32 expect_delay)
1610{
1611 u32 delay = expect_delay;
1612 bool gr_enabled;
1613 bool ctxsw_active;
1614 bool gr_busy;
1615 u32 gr_status;
1616 u32 activity0, activity1, activity2, activity4;
1617 struct nvgpu_timeout timeout;
1618
1619 gk20a_dbg_fn("");
1620
1621 nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER);
1622
1623 do {
1624 /* fmodel: host gets fifo_engine_status(gr) from gr
1625 only when gr_status is read */
1626 gr_status = gk20a_readl(g, gr_status_r());
1627
1628 gr_enabled = gk20a_readl(g, mc_enable_r()) &
1629 mc_enable_pgraph_enabled_f();
1630
1631 ctxsw_active = gr_status & 1<<7;
1632
1633 activity0 = gk20a_readl(g, gr_activity_0_r());
1634 activity1 = gk20a_readl(g, gr_activity_1_r());
1635 activity2 = gk20a_readl(g, gr_activity_2_r());
1636 activity4 = gk20a_readl(g, gr_activity_4_r());
1637
1638 gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
1639 gr_activity_empty_or_preempted(activity1) &&
1640 activity2 == 0 &&
1641 gr_activity_empty_or_preempted(activity4));
1642
1643 if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
1644 gk20a_dbg_fn("done");
1645 return 0;
1646 }
1647
1648 usleep_range(delay, delay * 2);
1649 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
1650
1651 } while (!nvgpu_timeout_expired(&timeout));
1652
1653 nvgpu_err(g,
1654 "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
1655 ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
1656
1657 return -EAGAIN;
1658}
1659
1660void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1661 struct channel_ctx_gk20a *ch_ctx,
1662 u64 addr, bool patch)
1663{
1664 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1665 int attrBufferSize;
1666
1667 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va)
1668 attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size;
1669 else
1670 attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
1671
1672 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1673
1674 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
1675
1676 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1677 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1678 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1679
1680 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1681 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1682
1683 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1684 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1685 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1686}
1687
1688void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1689{
1690#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
1691 tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0);
1692 tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0);
1693#else
1694 tegra_fuse_control_write(0x1, FUSE_FUSEBYPASS_0);
1695 tegra_fuse_control_write(0x0, FUSE_WRITE_ACCESS_SW_0);
1696#endif
1697
1698 if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
1699 tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
1700 else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2)
1701 tegra_fuse_writel(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0);
1702 else
1703 tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
1704}
1705
1706void gr_gv11b_get_access_map(struct gk20a *g,
1707 u32 **whitelist, int *num_entries)
1708{
1709 static u32 wl_addr_gv11b[] = {
1710 /* this list must be sorted (low to high) */
1711 0x404468, /* gr_pri_mme_max_instructions */
1712 0x418300, /* gr_pri_gpcs_rasterarb_line_class */
1713 0x418800, /* gr_pri_gpcs_setup_debug */
1714 0x418e00, /* gr_pri_gpcs_swdx_config */
1715 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1716 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1717 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1718 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1719 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1720 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1721 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1722 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1723 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1724 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1725 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1726 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1727 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1728 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1729 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1730 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1731 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1732 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1733 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1734 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1735 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1736 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
1737 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
1738 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
1739 0x419e84, /* gr_pri_gpcs_tpcs_sms_dbgr_control0 */
1740 0x419ba4, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
1741 };
1742
1743 *whitelist = wl_addr_gv11b;
1744 *num_entries = ARRAY_SIZE(wl_addr_gv11b);
1745}
1746
1747/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1748 *
1749 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
1750 */
1751int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
1752 u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
1753 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
1754 bool *early_exit, bool *ignore_debugger)
1755{
1756 int ret;
1757 bool cilp_enabled = false;
1758 u32 global_mask = 0, dbgr_control0, global_esr_copy;
1759 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
1760 gk20a_gr_tpc_offset(g, tpc) +
1761 gv11b_gr_sm_offset(g, sm);
1762
1763 *early_exit = false;
1764 *ignore_debugger = false;
1765
1766 if (fault_ch)
1767 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
1768 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
1769
1770 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1771 "SM Exception received on gpc %d tpc %d sm %d = 0x%08x",
1772 gpc, tpc, sm, global_esr);
1773
1774 if (cilp_enabled && sm_debugger_attached) {
1775 if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f())
1776 gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
1777 gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f());
1778
1779 if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f())
1780 gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
1781 gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f());
1782
1783 global_mask = gr_gpc0_tpc0_sm0_hww_global_esr_multiple_warp_errors_pending_f() |
1784 gr_gpc0_tpc0_sm0_hww_global_esr_bpt_pause_pending_f();
1785
1786 if (warp_esr != 0 || (global_esr & global_mask) != 0) {
1787 *ignore_debugger = true;
1788
1789 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1790 "CILP: starting wait for LOCKED_DOWN on "
1791 "gpc %d tpc %d sm %d",
1792 gpc, tpc, sm);
1793
1794 if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
1795 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1796 "CILP: Broadcasting STOP_TRIGGER from "
1797 "gpc %d tpc %d sm %d",
1798 gpc, tpc, sm);
1799 g->ops.gr.suspend_all_sms(g,
1800 global_mask, false);
1801
1802 gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
1803 } else {
1804 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1805 "CILP: STOP_TRIGGER from "
1806 "gpc %d tpc %d sm %d",
1807 gpc, tpc, sm);
1808 g->ops.gr.suspend_single_sm(g,
1809 gpc, tpc, sm, global_mask, true);
1810 }
1811
1812 /* reset the HWW errors after locking down */
1813 global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g,
1814 gpc, tpc, sm);
1815 g->ops.gr.clear_sm_hww(g,
1816 gpc, tpc, sm, global_esr_copy);
1817 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1818 "CILP: HWWs cleared for "
1819 "gpc %d tpc %d sm %d",
1820 gpc, tpc, sm);
1821
1822 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
1823 ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
1824 if (ret) {
1825 nvgpu_err(g, "CILP: error while setting CILP preempt pending!");
1826 return ret;
1827 }
1828
1829 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
1830 if (dbgr_control0 & gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_enable_f()) {
1831 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1832 "CILP: clearing SINGLE_STEP_MODE "
1833 "before resume for gpc %d tpc %d sm %d",
1834 gpc, tpc, sm);
1835 dbgr_control0 = set_field(dbgr_control0,
1836 gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_m(),
1837 gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_disable_f());
1838 gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0);
1839 }
1840
1841 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1842 "CILP: resume for gpc %d tpc %d sm %d",
1843 gpc, tpc, sm);
1844 g->ops.gr.resume_single_sm(g, gpc, tpc, sm);
1845
1846 *ignore_debugger = true;
1847 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1848 "CILP: All done on gpc %d, tpc %d sm %d",
1849 gpc, tpc, sm);
1850 }
1851
1852 *early_exit = true;
1853 }
1854 return 0;
1855}
1856
1857static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
1858{
1859 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
1860 u32 corrected_delta, uncorrected_delta;
1861 u32 corrected_overflow, uncorrected_overflow;
1862
1863 if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() |
1864 gr_fecs_host_int_status_ecc_corrected_m())) {
1865 ecc_status = gk20a_readl(g, gr_fecs_falcon_ecc_status_r());
1866 ecc_addr = gk20a_readl(g,
1867 gr_fecs_falcon_ecc_address_r());
1868 corrected_cnt = gk20a_readl(g,
1869 gr_fecs_falcon_ecc_corrected_err_count_r());
1870 uncorrected_cnt = gk20a_readl(g,
1871 gr_fecs_falcon_ecc_uncorrected_err_count_r());
1872
1873 corrected_delta =
1874 gr_fecs_falcon_ecc_corrected_err_count_total_v(
1875 corrected_cnt);
1876 uncorrected_delta =
1877 gr_fecs_falcon_ecc_uncorrected_err_count_total_v(
1878 uncorrected_cnt);
1879
1880 corrected_overflow = ecc_status &
1881 gr_fecs_falcon_ecc_status_corrected_err_total_counter_overflow_m();
1882 uncorrected_overflow = ecc_status &
1883 gr_fecs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m();
1884
1885 /* clear the interrupt */
1886 if ((corrected_delta > 0) || corrected_overflow)
1887 gk20a_writel(g,
1888 gr_fecs_falcon_ecc_corrected_err_count_r(), 0);
1889 if ((uncorrected_delta > 0) || uncorrected_overflow)
1890 gk20a_writel(g,
1891 gr_fecs_falcon_ecc_uncorrected_err_count_r(),
1892 0);
1893
1894
1895 /* clear the interrupt */
1896 gk20a_writel(g, gr_fecs_falcon_ecc_uncorrected_err_count_r(),
1897 0);
1898 gk20a_writel(g, gr_fecs_falcon_ecc_corrected_err_count_r(), 0);
1899
1900 /* clear the interrupt */
1901 gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
1902 gr_fecs_falcon_ecc_status_reset_task_f());
1903
1904 g->ecc.gr.t19x.fecs_corrected_err_count.counters[0] +=
1905 corrected_delta;
1906 g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0] +=
1907 uncorrected_delta;
1908
1909 nvgpu_log(g, gpu_dbg_intr,
1910 "fecs ecc interrupt intr: 0x%x", intr);
1911
1912 if (ecc_status &
1913 gr_fecs_falcon_ecc_status_corrected_err_imem_m())
1914 nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
1915 if (ecc_status &
1916 gr_fecs_falcon_ecc_status_uncorrected_err_imem_m())
1917 nvgpu_log(g, gpu_dbg_intr,
1918 "imem ecc error uncorrected");
1919 if (ecc_status &
1920 gr_fecs_falcon_ecc_status_corrected_err_dmem_m())
1921 nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
1922 if (ecc_status &
1923 gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m())
1924 nvgpu_log(g, gpu_dbg_intr,
1925 "dmem ecc error uncorrected");
1926 if (corrected_overflow || uncorrected_overflow)
1927 nvgpu_info(g, "fecs ecc counter overflow!");
1928
1929 nvgpu_log(g, gpu_dbg_intr,
1930 "ecc error row address: 0x%x",
1931 gr_fecs_falcon_ecc_address_row_address_v(ecc_addr));
1932
1933 nvgpu_log(g, gpu_dbg_intr,
1934 "ecc error count corrected: %d, uncorrected %d",
1935 g->ecc.gr.t19x.fecs_corrected_err_count.counters[0],
1936 g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0]);
1937 }
1938}
1939
1940int gr_gv11b_handle_fecs_error(struct gk20a *g,
1941 struct channel_gk20a *__ch,
1942 struct gr_gk20a_isr_data *isr_data)
1943{
1944 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
1945 int ret;
1946
1947 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1948
1949 ret = gr_gp10b_handle_fecs_error(g, __ch, isr_data);
1950
1951 /* Handle ECC errors */
1952 gr_gv11b_handle_fecs_ecc_error(g, gr_fecs_intr);
1953
1954 return ret;
1955}
1956
1957int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1958{
1959 u32 map;
1960 u32 i, j, mapregs;
1961 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
1962 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
1963 GPU_LIT_NUM_TPC_PER_GPC);
1964
1965 gk20a_dbg_fn("");
1966
1967 if (!gr->map_tiles)
1968 return -1;
1969
1970 gk20a_writel(g, gr_crstr_map_table_cfg_r(),
1971 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
1972 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
1973
1974 /* 6 tpc can be stored in one map register */
1975 mapregs = (num_gpcs * num_tpc_per_gpc + 5) / 6;
1976
1977 for (i = 0, j = 0; i < mapregs; i++, j = j + 6) {
1978 map = gr_crstr_gpc_map_tile0_f(gr->map_tiles[j]) |
1979 gr_crstr_gpc_map_tile1_f(gr->map_tiles[j + 1]) |
1980 gr_crstr_gpc_map_tile2_f(gr->map_tiles[j + 2]) |
1981 gr_crstr_gpc_map_tile3_f(gr->map_tiles[j + 3]) |
1982 gr_crstr_gpc_map_tile4_f(gr->map_tiles[j + 4]) |
1983 gr_crstr_gpc_map_tile5_f(gr->map_tiles[j + 5]);
1984
1985 gk20a_writel(g, gr_crstr_gpc_map_r(i), map);
1986 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map_r(i), map);
1987 gk20a_writel(g, gr_rstr2d_gpc_map_r(i), map);
1988 }
1989
1990 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
1991 gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
1992 gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
1993
1994 for (i = 0, j = 1; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v();
1995 i++, j = j + 4) {
1996 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
1997 gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
1998 ((1 << j) % gr->tpc_count)) |
1999 gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
2000 ((1 << (j + 1)) % gr->tpc_count)) |
2001 gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
2002 ((1 << (j + 2)) % gr->tpc_count)) |
2003 gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
2004 ((1 << (j + 3)) % gr->tpc_count)));
2005 }
2006
2007 gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
2008 gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
2009 gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
2010
2011 return 0;
2012}
2013
2014static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
2015{
2016 struct av_list_gk20a *sw_veid_bundle_init =
2017 &g->gr.ctx_vars.sw_veid_bundle_init;
2018 u32 j;
2019 u32 num_subctx, err = 0;
2020
2021 num_subctx = g->fifo.t19x.max_subctx_count;
2022
2023 for (j = 0; j < num_subctx; j++) {
2024 nvgpu_log_fn(g, "write bundle_address_r for subctx: %d", j);
2025 gk20a_writel(g, gr_pipe_bundle_address_r(),
2026 sw_veid_bundle_init->l[index].addr |
2027 gr_pipe_bundle_address_veid_f(j));
2028
2029 err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g),
2030 GR_IDLE_CHECK_DEFAULT);
2031 }
2032 return err;
2033}
2034
2035int gr_gv11b_init_sw_veid_bundle(struct gk20a *g)
2036{
2037 struct av_list_gk20a *sw_veid_bundle_init =
2038 &g->gr.ctx_vars.sw_veid_bundle_init;
2039 u32 i;
2040 u32 last_bundle_data = 0;
2041 u32 err = 0;
2042
2043 for (i = 0; i < sw_veid_bundle_init->count; i++) {
2044 nvgpu_log_fn(g, "veid bundle count: %d", i);
2045
2046 if (i == 0 || last_bundle_data !=
2047 sw_veid_bundle_init->l[i].value) {
2048 gk20a_writel(g, gr_pipe_bundle_data_r(),
2049 sw_veid_bundle_init->l[i].value);
2050 last_bundle_data = sw_veid_bundle_init->l[i].value;
2051 nvgpu_log_fn(g, "last_bundle_data : 0x%08x",
2052 last_bundle_data);
2053 }
2054
2055 if (gr_pipe_bundle_address_value_v(
2056 sw_veid_bundle_init->l[i].addr) == GR_GO_IDLE_BUNDLE) {
2057 nvgpu_log_fn(g, "go idle bundle");
2058 gk20a_writel(g, gr_pipe_bundle_address_r(),
2059 sw_veid_bundle_init->l[i].addr);
2060 err |= gr_gk20a_wait_idle(g,
2061 gk20a_get_gr_idle_timeout(g),
2062 GR_IDLE_CHECK_DEFAULT);
2063 } else
2064 err = gv11b_write_bundle_veid_state(g, i);
2065
2066 if (err) {
2067 nvgpu_err(g, "failed to init sw veid bundle");
2068 break;
2069 }
2070 }
2071 return err;
2072}
2073
2074void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
2075 u32 *zcull_map_tiles)
2076{
2077 u32 val, i, j;
2078
2079 gk20a_dbg_fn("");
2080
2081 for (i = 0, j = 0; i < (zcull_num_entries / 8); i++, j += 8) {
2082 val =
2083 gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f(
2084 zcull_map_tiles[j+0]) |
2085 gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f(
2086 zcull_map_tiles[j+1]) |
2087 gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f(
2088 zcull_map_tiles[j+2]) |
2089 gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f(
2090 zcull_map_tiles[j+3]) |
2091 gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f(
2092 zcull_map_tiles[j+4]) |
2093 gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f(
2094 zcull_map_tiles[j+5]) |
2095 gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f(
2096 zcull_map_tiles[j+6]) |
2097 gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f(
2098 zcull_map_tiles[j+7]);
2099
2100 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val);
2101 }
2102}
2103
2104void gr_gv11b_detect_sm_arch(struct gk20a *g)
2105{
2106 u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
2107
2108 g->params.sm_arch_spa_version =
2109 gr_gpc0_tpc0_sm_arch_spa_version_v(v);
2110 g->params.sm_arch_sm_version =
2111 gr_gpc0_tpc0_sm_arch_sm_version_v(v);
2112 g->params.sm_arch_warp_count =
2113 gr_gpc0_tpc0_sm_arch_warp_count_v(v);
2114}
2115
2116void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
2117 u32 gpc, u32 tpc, u32 smid)
2118{
2119 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
2120 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
2121 GPU_LIT_TPC_IN_GPC_STRIDE);
2122 u32 gpc_offset = gpc_stride * gpc;
2123 u32 tpc_offset = tpc_in_gpc_stride * tpc;
2124 u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index;
2125
2126 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
2127 gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
2128 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
2129 gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index));
2130 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
2131 gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index));
2132}
2133
2134int gr_gv11b_load_smid_config(struct gk20a *g)
2135{
2136 u32 *tpc_sm_id;
2137 u32 i, j;
2138 u32 tpc_index, gpc_index, tpc_id;
2139 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
2140 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
2141
2142 tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32));
2143 if (!tpc_sm_id)
2144 return -ENOMEM;
2145
2146 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
2147 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
2148 u32 reg = 0;
2149 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
2150 gr_cwd_gpc_tpc_id_tpc0_s();
2151
2152 for (j = 0; j < 4; j++) {
2153 u32 sm_id;
2154 u32 bits;
2155
2156 tpc_id = (i << 2) + j;
2157 sm_id = tpc_id * sm_per_tpc;
2158
2159 if (sm_id >= g->gr.no_of_sm)
2160 break;
2161
2162 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
2163 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
2164
2165 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
2166 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
2167 reg |= bits << (j * bit_stride);
2168
2169 tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4)
2170 >> 2))] |= tpc_id << tpc_index * bit_stride;
2171 }
2172 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
2173 }
2174
2175 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
2176 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
2177 nvgpu_kfree(g, tpc_sm_id);
2178
2179 return 0;
2180}
2181
2182int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
2183{
2184 u32 addr_lo;
2185 u32 addr_hi;
2186 struct ctx_header_desc *ctx;
2187 int err;
2188
2189 gk20a_dbg_fn("");
2190
2191 err = gv11b_alloc_subctx_header(c);
2192 if (err)
2193 return err;
2194
2195 err = gv11b_update_subctx_header(c, gpu_va);
2196 if (err)
2197 return err;
2198
2199 ctx = &c->ch_ctx.ctx_header;
2200 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
2201 addr_hi = u64_hi32(ctx->mem.gpu_va);
2202
2203 /* point this address to engine_wfi_ptr */
2204 nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(),
2205 ram_in_engine_cs_wfi_v() |
2206 ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) |
2207 ram_in_engine_wfi_ptr_lo_f(addr_lo));
2208
2209 nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(),
2210 ram_in_engine_wfi_ptr_hi_f(addr_hi));
2211
2212 return 0;
2213}
2214
2215
2216
2217int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
2218{
2219 struct channel_ctx_gk20a *ch_ctx = NULL;
2220 u32 pd_ab_dist_cfg0;
2221 u32 ds_debug;
2222 u32 mpc_vtg_debug;
2223 u32 pe_vaf;
2224 u32 pe_vsc_vpc;
2225
2226 gk20a_dbg_fn("");
2227
2228 pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
2229 ds_debug = gk20a_readl(g, gr_ds_debug_r());
2230 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
2231
2232 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
2233 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
2234
2235 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
2236 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() |
2237 pe_vsc_vpc;
2238 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() |
2239 pd_ab_dist_cfg0;
2240 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
2241 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() |
2242 mpc_vtg_debug;
2243
2244 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf,
2245 false);
2246 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(),
2247 pe_vsc_vpc, false);
2248 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(),
2249 pd_ab_dist_cfg0, false);
2250 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
2251 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(),
2252 mpc_vtg_debug, false);
2253
2254 return 0;
2255}
2256
2257void gr_gv11b_write_zcull_ptr(struct gk20a *g,
2258 struct nvgpu_mem *mem, u64 gpu_va)
2259{
2260 u32 va_lo, va_hi;
2261
2262 gpu_va = gpu_va >> 8;
2263 va_lo = u64_lo32(gpu_va);
2264 va_hi = u64_hi32(gpu_va);
2265 nvgpu_mem_wr(g, mem,
2266 ctxsw_prog_main_image_zcull_ptr_o(), va_lo);
2267 nvgpu_mem_wr(g, mem,
2268 ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi);
2269}
2270
2271
2272void gr_gv11b_write_pm_ptr(struct gk20a *g,
2273 struct nvgpu_mem *mem, u64 gpu_va)
2274{
2275 u32 va_lo, va_hi;
2276
2277 gpu_va = gpu_va >> 8;
2278 va_lo = u64_lo32(gpu_va);
2279 va_hi = u64_hi32(gpu_va);
2280 nvgpu_mem_wr(g, mem,
2281 ctxsw_prog_main_image_pm_ptr_o(), va_lo);
2282 nvgpu_mem_wr(g, mem,
2283 ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi);
2284}
2285
2286void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
2287{
2288 u32 gate_ctrl;
2289
2290 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_ELCG))
2291 return;
2292
2293 gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
2294
2295 switch (mode) {
2296 case ELCG_RUN:
2297 gate_ctrl = set_field(gate_ctrl,
2298 therm_gate_ctrl_eng_clk_m(),
2299 therm_gate_ctrl_eng_clk_run_f());
2300 gate_ctrl = set_field(gate_ctrl,
2301 therm_gate_ctrl_idle_holdoff_m(),
2302 therm_gate_ctrl_idle_holdoff_on_f());
2303 break;
2304 case ELCG_STOP:
2305 gate_ctrl = set_field(gate_ctrl,
2306 therm_gate_ctrl_eng_clk_m(),
2307 therm_gate_ctrl_eng_clk_stop_f());
2308 break;
2309 case ELCG_AUTO:
2310 gate_ctrl = set_field(gate_ctrl,
2311 therm_gate_ctrl_eng_clk_m(),
2312 therm_gate_ctrl_eng_clk_auto_f());
2313 break;
2314 default:
2315 nvgpu_err(g, "invalid elcg mode %d", mode);
2316 }
2317
2318 gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
2319}
2320
2321void gr_gv11b_load_tpc_mask(struct gk20a *g)
2322{
2323 u32 pes_tpc_mask = 0, fuse_tpc_mask;
2324 u32 gpc, pes, val;
2325 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
2326 GPU_LIT_NUM_TPC_PER_GPC);
2327
2328 /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
2329 for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
2330 for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
2331 pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] <<
2332 num_tpc_per_gpc * gpc;
2333 }
2334 }
2335
2336 gk20a_dbg_info("pes_tpc_mask %u\n", pes_tpc_mask);
2337 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, gpc);
2338 if (g->tpc_fs_mask_user &&
2339 g->tpc_fs_mask_user != fuse_tpc_mask &&
2340 fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) {
2341 val = g->tpc_fs_mask_user;
2342 val &= (0x1U << g->gr.max_tpc_count) - 1U;
2343 val = (0x1U << hweight32(val)) - 1U;
2344 gk20a_writel(g, gr_fe_tpc_fs_r(0), val);
2345 } else {
2346 gk20a_writel(g, gr_fe_tpc_fs_r(0), pes_tpc_mask);
2347 }
2348
2349}
2350
2351void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
2352 struct nvgpu_mem *mem, u64 gpu_va)
2353{
2354 u32 addr_lo, addr_hi;
2355
2356 addr_lo = u64_lo32(gpu_va);
2357 addr_hi = u64_hi32(gpu_va);
2358
2359 nvgpu_mem_wr(g, mem,
2360 ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo);
2361 nvgpu_mem_wr(g, mem,
2362 ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi);
2363
2364 nvgpu_mem_wr(g, mem,
2365 ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo);
2366 nvgpu_mem_wr(g, mem,
2367 ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
2368 addr_hi);
2369
2370}
2371
2372int gr_gv11b_init_fs_state(struct gk20a *g)
2373{
2374 u32 data;
2375
2376 gk20a_dbg_fn("");
2377
2378 data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
2379 data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
2380 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
2381 gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data);
2382
2383 data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
2384 data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(),
2385 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
2386 gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
2387
2388 if (g->gr.t18x.fecs_feature_override_ecc_val != 0) {
2389 gk20a_writel(g,
2390 gr_fecs_feature_override_ecc_r(),
2391 g->gr.t18x.fecs_feature_override_ecc_val);
2392 }
2393
2394 return gr_gm20b_init_fs_state(g);
2395}
2396
2397void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
2398 u32 *esr_sm_sel)
2399{
2400 u32 reg_val;
2401 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
2402
2403 reg_val = gk20a_readl(g, gr_gpc0_tpc0_sm_tpc_esr_sm_sel_r() + offset);
2404 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
2405 "sm tpc esr sm sel reg val: 0x%x", reg_val);
2406 *esr_sm_sel = 0;
2407 if (gr_gpc0_tpc0_sm_tpc_esr_sm_sel_sm0_error_v(reg_val))
2408 *esr_sm_sel = 1;
2409 if (gr_gpc0_tpc0_sm_tpc_esr_sm_sel_sm1_error_v(reg_val))
2410 *esr_sm_sel |= 1 << 1;
2411 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
2412 "esr_sm_sel bitmask: 0x%x", *esr_sm_sel);
2413}
2414
2415int gv11b_gr_sm_trigger_suspend(struct gk20a *g)
2416{
2417 u32 dbgr_control0;
2418
2419 /* assert stop trigger. uniformity assumption: all SMs will have
2420 * the same state in dbg_control0.
2421 */
2422 dbgr_control0 =
2423 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r());
2424 dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f();
2425
2426 /* broadcast write */
2427 gk20a_writel(g,
2428 gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
2429
2430 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
2431 "stop trigger enable: broadcast dbgr_control0: 0x%x ",
2432 dbgr_control0);
2433
2434 return 0;
2435}
2436
2437void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)
2438{
2439 /* Check if we have at least one valid warp
2440 * get paused state on maxwell
2441 */
2442 struct gr_gk20a *gr = &g->gr;
2443 u32 gpc, tpc, sm, sm_id;
2444 u32 offset;
2445 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
2446
2447 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
2448 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
2449 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
2450 sm = g->gr.sm_to_cluster[sm_id].sm_index;
2451
2452 offset = gk20a_gr_gpc_offset(g, gpc) +
2453 gk20a_gr_tpc_offset(g, tpc) +
2454 gv11b_gr_sm_offset(g, sm);
2455
2456 /* 64 bit read */
2457 warps_valid = (u64)gk20a_readl(g,
2458 gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() +
2459 offset) << 32;
2460 warps_valid |= gk20a_readl(g,
2461 gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() +
2462 offset);
2463
2464 /* 64 bit read */
2465 warps_paused = (u64)gk20a_readl(g,
2466 gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() +
2467 offset) << 32;
2468 warps_paused |= gk20a_readl(g,
2469 gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() +
2470 offset);
2471
2472 /* 64 bit read */
2473 warps_trapped = (u64)gk20a_readl(g,
2474 gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() +
2475 offset) << 32;
2476 warps_trapped |= gk20a_readl(g,
2477 gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() +
2478 offset);
2479
2480 w_state[sm_id].valid_warps[0] = warps_valid;
2481 w_state[sm_id].trapped_warps[0] = warps_trapped;
2482 w_state[sm_id].paused_warps[0] = warps_paused;
2483 }
2484
2485
2486 /* Only for debug purpose */
2487 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
2488 gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
2489 sm_id, w_state[sm_id].valid_warps[0]);
2490 gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n",
2491 sm_id, w_state[sm_id].valid_warps[1]);
2492
2493 gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
2494 sm_id, w_state[sm_id].trapped_warps[0]);
2495 gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n",
2496 sm_id, w_state[sm_id].trapped_warps[1]);
2497
2498 gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
2499 sm_id, w_state[sm_id].paused_warps[0]);
2500 gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n",
2501 sm_id, w_state[sm_id].paused_warps[1]);
2502 }
2503}
2504
2505int gv11b_gr_update_sm_error_state(struct gk20a *g,
2506 struct channel_gk20a *ch, u32 sm_id,
2507 struct nvgpu_gr_sm_error_state *sm_error_state)
2508{
2509 u32 gpc, tpc, sm, offset;
2510 struct gr_gk20a *gr = &g->gr;
2511 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
2512 int err = 0;
2513
2514 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
2515
2516 gr->sm_error_states[sm_id].hww_global_esr =
2517 sm_error_state->hww_global_esr;
2518 gr->sm_error_states[sm_id].hww_warp_esr =
2519 sm_error_state->hww_warp_esr;
2520 gr->sm_error_states[sm_id].hww_warp_esr_pc =
2521 sm_error_state->hww_warp_esr_pc;
2522 gr->sm_error_states[sm_id].hww_global_esr_report_mask =
2523 sm_error_state->hww_global_esr_report_mask;
2524 gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
2525 sm_error_state->hww_warp_esr_report_mask;
2526
2527 err = gr_gk20a_disable_ctxsw(g);
2528 if (err) {
2529 nvgpu_err(g, "unable to stop gr ctxsw");
2530 goto fail;
2531 }
2532
2533 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
2534 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
2535 sm = g->gr.sm_to_cluster[sm_id].sm_index;
2536
2537 offset = gk20a_gr_gpc_offset(g, gpc) +
2538 gk20a_gr_tpc_offset(g, tpc) +
2539 gv11b_gr_sm_offset(g, sm);
2540
2541 if (gk20a_is_channel_ctx_resident(ch)) {
2542 gk20a_writel(g,
2543 gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
2544 gr->sm_error_states[sm_id].hww_global_esr);
2545 gk20a_writel(g,
2546 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset,
2547 gr->sm_error_states[sm_id].hww_warp_esr);
2548 gk20a_writel(g,
2549 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset,
2550 gr->sm_error_states[sm_id].hww_warp_esr_pc);
2551 gk20a_writel(g,
2552 gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset,
2553 gr->sm_error_states[sm_id].hww_global_esr_report_mask);
2554 gk20a_writel(g,
2555 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset,
2556 gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
2557 } else {
2558 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
2559 if (err)
2560 goto enable_ctxsw;
2561
2562 gr_gk20a_ctx_patch_write(g, ch_ctx,
2563 gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r() +
2564 offset,
2565 gr->sm_error_states[sm_id].hww_global_esr_report_mask,
2566 true);
2567 gr_gk20a_ctx_patch_write(g, ch_ctx,
2568 gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r() +
2569 offset,
2570 gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
2571 true);
2572
2573 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
2574 }
2575
2576enable_ctxsw:
2577 err = gr_gk20a_enable_ctxsw(g);
2578
2579fail:
2580 nvgpu_mutex_release(&g->dbg_sessions_lock);
2581 return err;
2582}
2583
2584int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
2585 struct channel_gk20a *ch, u64 sms, bool enable)
2586{
2587 struct nvgpu_dbg_gpu_reg_op *ops;
2588 unsigned int i = 0, sm_id;
2589 int err;
2590
2591 ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops));
2592 if (!ops)
2593 return -ENOMEM;
2594 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
2595 u32 gpc, tpc, sm;
2596 u32 reg_offset, reg_mask, reg_val;
2597
2598 if (!(sms & (1 << sm_id)))
2599 continue;
2600
2601 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
2602 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
2603 sm = g->gr.sm_to_cluster[sm_id].sm_index;
2604
2605 reg_offset = gk20a_gr_gpc_offset(g, gpc) +
2606 gk20a_gr_tpc_offset(g, tpc) +
2607 gv11b_gr_sm_offset(g, sm);
2608
2609 ops[i].op = REGOP(WRITE_32);
2610 ops[i].type = REGOP(TYPE_GR_CTX);
2611 ops[i].offset = gr_gpc0_tpc0_sm0_dbgr_control0_r() + reg_offset;
2612
2613 reg_mask = 0;
2614 reg_val = 0;
2615 if (enable) {
2616 nvgpu_log(g, gpu_dbg_gpu_dbg,
2617 "SM:%d debuggger mode ON", sm);
2618 reg_mask |=
2619 gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_m();
2620 reg_val |=
2621 gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_on_f();
2622 } else {
2623 nvgpu_log(g, gpu_dbg_gpu_dbg,
2624 "SM:%d debuggger mode Off", sm);
2625 reg_mask |=
2626 gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_m();
2627 reg_val |=
2628 gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_off_f();
2629 }
2630
2631 ops[i].and_n_mask_lo = reg_mask;
2632 ops[i].value_lo = reg_val;
2633 i++;
2634 }
2635
2636 err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0);
2637 if (err)
2638 nvgpu_err(g, "Failed to access register\n");
2639 nvgpu_kfree(g, ops);
2640 return err;
2641}
2642
2643int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
2644{
2645 int sm_id;
2646 struct gr_gk20a *gr = &g->gr;
2647 u32 offset, sm, sm_per_tpc;
2648 u32 gpc_tpc_offset;
2649
2650 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
2651
2652 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
2653 gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) +
2654 gk20a_gr_tpc_offset(g, tpc);
2655
2656 sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g,
2657 gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset));
2658
2659 sm = sm_id % sm_per_tpc;
2660
2661 offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm);
2662
2663 gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
2664 gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset);
2665
2666 gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
2667 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset);
2668
2669 gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
2670 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset);
2671
2672 gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
2673 gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset);
2674
2675 gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
2676 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset);
2677
2678 nvgpu_mutex_release(&g->dbg_sessions_lock);
2679
2680 return 0;
2681}
2682
2683void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g)
2684{
2685
2686 /* clear hww */
2687 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_r(), 0xffffffff);
2688 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_r(), 0xffffffff);
2689
2690 /* setup sm warp esr report masks */
2691 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(),
2692 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_error_report_f() |
2693 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_api_stack_error_report_f() |
2694 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_pc_wrap_report_f() |
2695 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_pc_report_f() |
2696 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_pc_overflow_report_f() |
2697 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_reg_report_f() |
2698 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
2699 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
2700 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_reg_report_f() |
2701 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_report_f() |
2702 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_report_f() |
2703 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
2704 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
2705 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() |
2706 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f());
2707
2708 /* setup sm global esr report mask. vat_alarm_report is not enabled */
2709 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(),
2710 gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_multiple_warp_errors_report_f());
2711}
2712
2713bool gv11b_gr_sm_debugger_attached(struct gk20a *g)
2714{
2715 u32 debugger_mode;
2716 u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r());
2717
2718 /* check if sm debugger is attached.
2719 * assumption: all SMs will have debug mode enabled/disabled
2720 * uniformly.
2721 */
2722 debugger_mode =
2723 gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_v(dbgr_control0);
2724 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
2725 "SM Debugger Mode: %d", debugger_mode);
2726 if (debugger_mode ==
2727 gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_on_v())
2728 return true;
2729
2730 return false;
2731}
2732
2733void gv11b_gr_suspend_single_sm(struct gk20a *g,
2734 u32 gpc, u32 tpc, u32 sm,
2735 u32 global_esr_mask, bool check_errors)
2736{
2737 int err;
2738 u32 dbgr_control0;
2739 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
2740 gk20a_gr_tpc_offset(g, tpc) +
2741 gv11b_gr_sm_offset(g, sm);
2742
2743 /* if an SM debugger isn't attached, skip suspend */
2744 if (!g->ops.gr.sm_debugger_attached(g)) {
2745 nvgpu_err(g,
2746 "SM debugger not attached, skipping suspend!");
2747 return;
2748 }
2749
2750 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2751 "suspending gpc:%d, tpc:%d, sm%d", gpc, tpc, sm);
2752
2753 /* assert stop trigger. */
2754 dbgr_control0 = gk20a_readl(g,
2755 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
2756 dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f();
2757 gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset,
2758 dbgr_control0);
2759
2760 err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
2761 global_esr_mask, check_errors);
2762 if (err) {
2763 nvgpu_err(g,
2764 "SuspendSm failed");
2765 return;
2766 }
2767}
2768
2769void gv11b_gr_suspend_all_sms(struct gk20a *g,
2770 u32 global_esr_mask, bool check_errors)
2771{
2772 struct gr_gk20a *gr = &g->gr;
2773 u32 gpc, tpc, sm;
2774 int err;
2775 u32 dbgr_control0;
2776 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
2777
2778 /* if an SM debugger isn't attached, skip suspend */
2779 if (!g->ops.gr.sm_debugger_attached(g)) {
2780 nvgpu_err(g,
2781 "SM debugger not attached, skipping suspend!");
2782 return;
2783 }
2784
2785 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "suspending all sms");
2786
2787 /* assert stop trigger. uniformity assumption: all SMs will have
2788 * the same state in dbg_control0.
2789 */
2790 dbgr_control0 =
2791 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r());
2792 dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f();
2793
2794 /* broadcast write */
2795 gk20a_writel(g,
2796 gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
2797
2798 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
2799 for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
2800 for (sm = 0; sm < sm_per_tpc; sm++) {
2801 err = g->ops.gr.wait_for_sm_lock_down(g,
2802 gpc, tpc, sm,
2803 global_esr_mask, check_errors);
2804 if (err) {
2805 nvgpu_err(g,
2806 "SuspendAllSms failed");
2807 return;
2808 }
2809 }
2810 }
2811 }
2812}
2813
2814void gv11b_gr_resume_single_sm(struct gk20a *g,
2815 u32 gpc, u32 tpc, u32 sm)
2816{
2817 u32 dbgr_control0, dbgr_status0;
2818 u32 offset;
2819 /*
2820 * The following requires some clarification. Despite the fact that both
2821 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
2822 * names, only one is actually a trigger, and that is the STOP_TRIGGER.
2823 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
2824 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
2825 * (_DISABLE) as well.
2826
2827 * Advice from the arch group: Disable the stop trigger first, as a
2828 * separate operation, in order to ensure that the trigger has taken
2829 * effect, before enabling the run trigger.
2830 */
2831
2832 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) +
2833 gv11b_gr_sm_offset(g, sm);
2834
2835 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2836 "resuming gpc:%d, tpc:%d, sm%d", gpc, tpc, sm);
2837 dbgr_control0 = gk20a_readl(g,
2838 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
2839 dbgr_status0 = gk20a_readl(g,
2840 gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset);
2841
2842 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2843 "before stop trigger disable: "
2844 "dbgr_control0 = 0x%x dbgr_status0: 0x%x",
2845 dbgr_control0, dbgr_status0);
2846
2847 /*De-assert stop trigger */
2848 dbgr_control0 = set_field(dbgr_control0,
2849 gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_m(),
2850 gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_disable_f());
2851 gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() +
2852 offset, dbgr_control0);
2853
2854 dbgr_control0 = gk20a_readl(g,
2855 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
2856 dbgr_status0 = gk20a_readl(g,
2857 gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset);
2858
2859 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2860 "before run trigger: "
2861 "dbgr_control0 = 0x%x dbgr_status0: 0x%x",
2862 dbgr_control0, dbgr_status0);
2863 /* Run trigger */
2864 dbgr_control0 |=
2865 gr_gpc0_tpc0_sm0_dbgr_control0_run_trigger_task_f();
2866 gk20a_writel(g,
2867 gr_gpc0_tpc0_sm0_dbgr_control0_r() +
2868 offset, dbgr_control0);
2869
2870 dbgr_control0 = gk20a_readl(g,
2871 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
2872 dbgr_status0 = gk20a_readl(g,
2873 gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset);
2874 /* run trigger is not sticky bit. SM clears it immediately */
2875 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2876 "after run trigger: "
2877 "dbgr_control0 = 0x%x dbgr_status0: 0x%x",
2878 dbgr_control0, dbgr_status0);
2879
2880}
2881
2882void gv11b_gr_resume_all_sms(struct gk20a *g)
2883{
2884 u32 dbgr_control0, dbgr_status0;
2885 /*
2886 * The following requires some clarification. Despite the fact that both
2887 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
2888 * names, only one is actually a trigger, and that is the STOP_TRIGGER.
2889 * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to
2890 * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0
2891 * (_DISABLE) as well.
2892
2893 * Advice from the arch group: Disable the stop trigger first, as a
2894 * separate operation, in order to ensure that the trigger has taken
2895 * effect, before enabling the run trigger.
2896 */
2897
2898 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "resuming all sms");
2899
2900 /* Read from unicast registers */
2901 dbgr_control0 =
2902 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r());
2903 dbgr_status0 =
2904 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r());
2905
2906 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2907 "before stop trigger disable: "
2908 "dbgr_control0 = 0x%x dbgr_status0: 0x%x",
2909 dbgr_control0, dbgr_status0);
2910
2911 dbgr_control0 = set_field(dbgr_control0,
2912 gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_m(),
2913 gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_disable_f());
2914 /* Write to broadcast registers */
2915 gk20a_writel(g,
2916 gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
2917
2918 /* Read from unicast registers */
2919 dbgr_control0 =
2920 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r());
2921 dbgr_status0 =
2922 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r());
2923
2924 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2925 "before run trigger: "
2926 "dbgr_control0 = 0x%x dbgr_status0: 0x%x",
2927 dbgr_control0, dbgr_status0);
2928 /* Run trigger */
2929 dbgr_control0 |=
2930 gr_gpc0_tpc0_sm0_dbgr_control0_run_trigger_task_f();
2931 /* Write to broadcast registers */
2932 gk20a_writel(g,
2933 gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
2934
2935 /* Read from unicast registers */
2936 dbgr_control0 =
2937 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r());
2938 dbgr_status0 =
2939 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r());
2940 /* run trigger is not sticky bit. SM clears it immediately */
2941 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2942 "after run trigger: "
2943 "dbgr_control0 = 0x%x dbgr_status0: 0x%x",
2944 dbgr_control0, dbgr_status0);
2945}
2946
2947int gv11b_gr_resume_from_pause(struct gk20a *g)
2948{
2949 int err = 0;
2950 u32 reg_val;
2951
2952 /* Clear the pause mask to tell the GPU we want to resume everyone */
2953 gk20a_writel(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r(), 0);
2954
2955 /* explicitly re-enable forwarding of SM interrupts upon any resume */
2956 reg_val = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
2957 reg_val |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
2958
2959 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), reg_val);
2960
2961 g->ops.gr.resume_all_sms(g);
2962
2963 return err;
2964}
2965
2966u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g,
2967 u32 gpc, u32 tpc, u32 sm)
2968{
2969 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
2970 gk20a_gr_tpc_offset(g, tpc) +
2971 gv11b_gr_sm_offset(g, sm);
2972
2973 u32 hww_warp_esr = gk20a_readl(g,
2974 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset);
2975 return hww_warp_esr;
2976}
2977
2978u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g,
2979 u32 gpc, u32 tpc, u32 sm)
2980{
2981 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
2982 gk20a_gr_tpc_offset(g, tpc) +
2983 gv11b_gr_sm_offset(g, sm);
2984
2985 u32 hww_global_esr = gk20a_readl(g,
2986 gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset);
2987
2988 return hww_global_esr;
2989}
2990
2991u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g)
2992{
2993 /*
2994 * These three interrupts don't require locking down the SM. They can
2995 * be handled by usermode clients as they aren't fatal. Additionally,
2996 * usermode clients may wish to allow some warps to execute while others
2997 * are at breakpoints, as opposed to fatal errors where all warps should
2998 * halt.
2999 */
3000 u32 global_esr_mask =
3001 gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f() |
3002 gr_gpc0_tpc0_sm0_hww_global_esr_bpt_pause_pending_f() |
3003 gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f();
3004
3005 return global_esr_mask;
3006}
3007
3008static void gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(struct gk20a *g,
3009 u32 offset, bool timeout)
3010{
3011 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
3012 u32 dbgr_control0 = gk20a_readl(g,
3013 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
3014 u32 dbgr_status0 = gk20a_readl(g,
3015 gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset);
3016 /* 64 bit read */
3017 warps_valid =
3018 (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() +
3019 offset) << 32;
3020 warps_valid |= gk20a_readl(g,
3021 gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() + offset);
3022
3023 /* 64 bit read */
3024 warps_paused =
3025 (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() +
3026 offset) << 32;
3027 warps_paused |= gk20a_readl(g,
3028 gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() + offset);
3029
3030 /* 64 bit read */
3031 warps_trapped =
3032 (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() +
3033 offset) << 32;
3034 warps_trapped |= gk20a_readl(g,
3035 gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() + offset);
3036 if (timeout)
3037 nvgpu_err(g,
3038 "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx "
3039 "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n",
3040 dbgr_status0, dbgr_control0, warps_valid,
3041 warps_paused, warps_trapped);
3042 else
3043 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
3044 "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx "
3045 "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n",
3046 dbgr_status0, dbgr_control0, warps_valid,
3047 warps_paused, warps_trapped);
3048}
3049
3050int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g,
3051 u32 gpc, u32 tpc, u32 sm,
3052 u32 global_esr_mask, bool check_errors)
3053{
3054 bool locked_down;
3055 bool no_error_pending;
3056 u32 delay = GR_IDLE_CHECK_DEFAULT;
3057 bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
3058 u32 dbgr_status0 = 0;
3059 u32 warp_esr, global_esr;
3060 struct nvgpu_timeout timeout;
3061 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
3062 gk20a_gr_tpc_offset(g, tpc) +
3063 gv11b_gr_sm_offset(g, sm);
3064
3065 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
3066 "GPC%d TPC%d: locking down SM%d", gpc, tpc, sm);
3067
3068 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
3069 NVGPU_TIMER_CPU_TIMER);
3070
3071 /* wait for the sm to lock down */
3072 do {
3073 global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm);
3074 dbgr_status0 = gk20a_readl(g,
3075 gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset);
3076
3077 warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
3078
3079 locked_down =
3080 (gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_v(dbgr_status0) ==
3081 gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_true_v());
3082 no_error_pending =
3083 check_errors &&
3084 (gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr) ==
3085 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v()) &&
3086 ((global_esr & global_esr_mask) == 0);
3087
3088 if (locked_down) {
3089 /*
3090 * if SM reports locked down, it means that SM is idle and
3091 * trapped and also that one of the these conditions are true
3092 * 1) sm is nonempty and all valid warps are paused
3093 * 2) sm is empty and held in trapped state due to stop trigger
3094 * 3) sm is nonempty and some warps are not paused, but are
3095 * instead held at RTT due to an "active" stop trigger
3096 * Check for Paused warp mask != Valid
3097 * warp mask after SM reports it is locked down in order to
3098 * distinguish case 1 from case 3. When case 3 is detected,
3099 * it implies a misprogrammed trap handler code, as all warps
3100 * in the handler must promise to BPT.PAUSE instead of RTT
3101 * whenever SR64 read in trap mode indicates stop trigger
3102 * is asserted.
3103 */
3104 gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g,
3105 offset, false);
3106 }
3107
3108 if (locked_down || no_error_pending) {
3109 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
3110 "GPC%d TPC%d: locked down SM%d", gpc, tpc, sm);
3111 return 0;
3112 }
3113
3114 /* if an mmu fault is pending and mmu debug mode is not
3115 * enabled, the sm will never lock down.
3116 */
3117 if (!mmu_debug_mode_enabled &&
3118 (g->ops.mm.mmu_fault_pending(g))) {
3119 nvgpu_err(g,
3120 "GPC%d TPC%d: mmu fault pending,"
3121 " SM%d will never lock down!", gpc, tpc, sm);
3122 return -EFAULT;
3123 }
3124
3125 nvgpu_usleep_range(delay, delay * 2);
3126 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
3127 } while (!nvgpu_timeout_expired(&timeout));
3128
3129 nvgpu_err(g, "GPC%d TPC%d: timed out while trying to "
3130 "lock down SM%d", gpc, tpc, sm);
3131 gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, offset, true);
3132
3133 return -ETIMEDOUT;
3134}
3135
3136int gv11b_gr_lock_down_sm(struct gk20a *g,
3137 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
3138 bool check_errors)
3139{
3140 u32 dbgr_control0;
3141 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) +
3142 gv11b_gr_sm_offset(g, sm);
3143
3144 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
3145 "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
3146
3147 /* assert stop trigger */
3148 dbgr_control0 =
3149 gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset);
3150 dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f();
3151 gk20a_writel(g,
3152 gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0);
3153
3154 return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
3155 check_errors);
3156}
3157
3158void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
3159 u32 global_esr)
3160{
3161 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) +
3162 gv11b_gr_sm_offset(g, sm);
3163
3164 gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
3165 global_esr);
3166 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
3167 "Cleared HWW global esr, current reg val: 0x%x",
3168 gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() +
3169 offset));
3170
3171 gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
3172 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
3173 "Cleared HWW warp esr, current reg val: 0x%x",
3174 gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() +
3175 offset));
3176}
3177
3178int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g,
3179 u32 gpc, u32 tpc, bool *post_event)
3180{
3181 u32 esr;
3182 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
3183 u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
3184 + offset);
3185
3186 if (!(tpc_exception & gr_gpc0_tpc0_tpccs_tpc_exception_mpc_m()))
3187 return 0;
3188
3189 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
3190 "GPC%d TPC%d MPC exception", gpc, tpc);
3191
3192 esr = gk20a_readl(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset);
3193 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr 0x%08x", esr);
3194
3195 esr = gk20a_readl(g, gr_gpc0_tpc0_mpc_hww_esr_info_r() + offset);
3196 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
3197 "mpc hww esr info: veid 0x%08x",
3198 gr_gpc0_tpc0_mpc_hww_esr_info_veid_v(esr));
3199
3200 gk20a_writel(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset,
3201 gr_gpc0_tpc0_mpc_hww_esr_reset_trigger_f());
3202
3203 return 0;
3204}
3205
3206static const u32 _num_ovr_perf_regs = 20;
3207static u32 _ovr_perf_regs[20] = { 0, };
3208
3209void gv11b_gr_init_ovr_sm_dsm_perf(void)
3210{
3211 if (_ovr_perf_regs[0] != 0)
3212 return;
3213
3214 _ovr_perf_regs[0] = gr_egpc0_etpc0_sm_dsm_perf_counter_control_sel0_r();
3215 _ovr_perf_regs[1] = gr_egpc0_etpc0_sm_dsm_perf_counter_control_sel1_r();
3216 _ovr_perf_regs[2] = gr_egpc0_etpc0_sm_dsm_perf_counter_control0_r();
3217 _ovr_perf_regs[3] = gr_egpc0_etpc0_sm_dsm_perf_counter_control1_r();
3218 _ovr_perf_regs[4] = gr_egpc0_etpc0_sm_dsm_perf_counter_control2_r();
3219 _ovr_perf_regs[5] = gr_egpc0_etpc0_sm_dsm_perf_counter_control3_r();
3220 _ovr_perf_regs[6] = gr_egpc0_etpc0_sm_dsm_perf_counter_control4_r();
3221 _ovr_perf_regs[7] = gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r();
3222 _ovr_perf_regs[8] = gr_egpc0_etpc0_sm_dsm_perf_counter0_control_r();
3223 _ovr_perf_regs[9] = gr_egpc0_etpc0_sm_dsm_perf_counter1_control_r();
3224 _ovr_perf_regs[10] = gr_egpc0_etpc0_sm_dsm_perf_counter2_control_r();
3225 _ovr_perf_regs[11] = gr_egpc0_etpc0_sm_dsm_perf_counter3_control_r();
3226 _ovr_perf_regs[12] = gr_egpc0_etpc0_sm_dsm_perf_counter4_control_r();
3227 _ovr_perf_regs[13] = gr_egpc0_etpc0_sm_dsm_perf_counter5_control_r();
3228 _ovr_perf_regs[14] = gr_egpc0_etpc0_sm_dsm_perf_counter6_control_r();
3229 _ovr_perf_regs[15] = gr_egpc0_etpc0_sm_dsm_perf_counter7_control_r();
3230
3231 _ovr_perf_regs[16] = gr_egpc0_etpc0_sm0_dsm_perf_counter4_r();
3232 _ovr_perf_regs[17] = gr_egpc0_etpc0_sm0_dsm_perf_counter5_r();
3233 _ovr_perf_regs[18] = gr_egpc0_etpc0_sm0_dsm_perf_counter6_r();
3234 _ovr_perf_regs[19] = gr_egpc0_etpc0_sm0_dsm_perf_counter7_r();
3235}
3236
3237/* Following are the blocks of registers that the ucode
3238 * stores in the extended region.
3239 */
3240/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
3241static const u32 _num_sm_dsm_perf_regs;
3242/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
3243static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
3244static u32 *_sm_dsm_perf_regs;
3245static u32 _sm_dsm_perf_ctrl_regs[2];
3246
3247void gv11b_gr_init_sm_dsm_reg_info(void)
3248{
3249 if (_sm_dsm_perf_ctrl_regs[0] != 0)
3250 return;
3251
3252 _sm_dsm_perf_ctrl_regs[0] =
3253 gr_egpc0_etpc0_sm_dsm_perf_counter_control0_r();
3254 _sm_dsm_perf_ctrl_regs[1] =
3255 gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r();
3256}
3257
3258void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g,
3259 u32 *num_sm_dsm_perf_regs,
3260 u32 **sm_dsm_perf_regs,
3261 u32 *perf_register_stride)
3262{
3263 *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
3264 *sm_dsm_perf_regs = _sm_dsm_perf_regs;
3265 *perf_register_stride =
3266 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
3267}
3268
3269void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
3270 u32 *num_sm_dsm_perf_ctrl_regs,
3271 u32 **sm_dsm_perf_ctrl_regs,
3272 u32 *ctrl_register_stride)
3273{
3274 *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
3275 *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
3276 *ctrl_register_stride =
3277 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
3278}
3279
3280void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
3281 u32 **ovr_perf_regs)
3282{
3283 *num_ovr_perf_regs = _num_ovr_perf_regs;
3284 *ovr_perf_regs = _ovr_perf_regs;
3285}
3286
3287void gv11b_gr_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
3288{
3289 u32 reg_val;
3290 u32 quad_ctrl;
3291 u32 half_ctrl;
3292 u32 tpc, gpc;
3293 u32 gpc_tpc_addr;
3294 u32 gpc_tpc_stride;
3295 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3296 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
3297 GPU_LIT_TPC_IN_GPC_STRIDE);
3298
3299 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset);
3300
3301 gpc = pri_get_gpc_num(g, offset);
3302 gpc_tpc_addr = pri_gpccs_addr_mask(offset);
3303 tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr);
3304
3305 quad_ctrl = quad & 0x1; /* first bit tells us quad */
3306 half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */
3307
3308 gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride;
3309 gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride;
3310
3311 /* read from unicast reg */
3312 reg_val = gk20a_readl(g, gpc_tpc_addr);
3313 reg_val = set_field(reg_val,
3314 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(),
3315 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl));
3316
3317 /* write to broadcast reg */
3318 gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), reg_val);
3319
3320 gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride;
3321 reg_val = gk20a_readl(g, gpc_tpc_addr);
3322 reg_val = set_field(reg_val,
3323 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(),
3324 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl));
3325
3326 /* write to broadcast reg */
3327 gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), reg_val);
3328}
3329
3330static bool pri_is_egpc_addr_shared(struct gk20a *g, u32 addr)
3331{
3332 u32 egpc_shared_base = EGPC_PRI_SHARED_BASE;
3333 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3334
3335 return (addr >= egpc_shared_base) &&
3336 (addr < egpc_shared_base + gpc_stride);
3337}
3338
3339bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr)
3340{
3341 u32 egpc_base = g->ops.gr.get_egpc_base(g);
3342 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3343 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
3344
3345 return ((addr >= egpc_base) &&
3346 (addr < egpc_base + num_gpcs * gpc_stride)) ||
3347 pri_is_egpc_addr_shared(g, addr);
3348}
3349
3350static inline u32 pri_smpc_in_etpc_addr_mask(struct gk20a *g, u32 addr)
3351{
3352 u32 smpc_stride = nvgpu_get_litter_value(g,
3353 GPU_LIT_SMPC_PRI_STRIDE);
3354
3355 return (addr & (smpc_stride - 1));
3356}
3357
3358static u32 pri_smpc_ext_addr(struct gk20a *g, u32 sm_offset, u32 gpc_num,
3359 u32 tpc_num, u32 sm_num)
3360{
3361 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3362 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g,
3363 GPU_LIT_TPC_IN_GPC_BASE);
3364 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
3365 GPU_LIT_TPC_IN_GPC_STRIDE);
3366 u32 egpc_base = g->ops.gr.get_egpc_base(g);
3367 u32 smpc_unique_base = nvgpu_get_litter_value(g,
3368 GPU_LIT_SMPC_PRI_UNIQUE_BASE);
3369 u32 smpc_stride = nvgpu_get_litter_value(g,
3370 GPU_LIT_SMPC_PRI_STRIDE);
3371
3372 return (egpc_base + (gpc_num * gpc_stride) + tpc_in_gpc_base +
3373 (tpc_num * tpc_in_gpc_stride) +
3374 (sm_num * smpc_stride) +
3375 (smpc_unique_base + sm_offset));
3376}
3377
3378static bool pri_is_smpc_addr_in_etpc_shared(struct gk20a *g, u32 addr)
3379{
3380 u32 smpc_shared_base = nvgpu_get_litter_value(g,
3381 GPU_LIT_SMPC_PRI_SHARED_BASE);
3382 u32 smpc_stride = nvgpu_get_litter_value(g,
3383 GPU_LIT_SMPC_PRI_STRIDE);
3384
3385 return (addr >= smpc_shared_base) &&
3386 (addr < smpc_shared_base + smpc_stride);
3387}
3388
3389bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr)
3390{
3391 u32 egpc_addr = 0;
3392
3393 if (g->ops.gr.is_egpc_addr(g, addr)) {
3394 egpc_addr = pri_gpccs_addr_mask(addr);
3395 if (g->ops.gr.is_tpc_addr(g, egpc_addr))
3396 return true;
3397 }
3398
3399 return false;
3400}
3401
3402static u32 pri_get_egpc_num(struct gk20a *g, u32 addr)
3403{
3404 u32 i, start;
3405 u32 egpc_base = g->ops.gr.get_egpc_base(g);
3406 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
3407 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3408
3409 for (i = 0; i < num_gpcs; i++) {
3410 start = egpc_base + (i * gpc_stride);
3411 if ((addr >= start) && (addr < (start + gpc_stride)))
3412 return i;
3413 }
3414 return 0;
3415}
3416
3417static u32 pri_egpc_addr(struct gk20a *g, u32 addr, u32 gpc)
3418{
3419 u32 egpc_base = g->ops.gr.get_egpc_base(g);
3420 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3421
3422 return egpc_base + (gpc * gpc_stride) + addr;
3423}
3424
3425static u32 pri_etpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc)
3426{
3427 u32 egpc_base = g->ops.gr.get_egpc_base(g);
3428 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3429 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g,
3430 GPU_LIT_TPC_IN_GPC_BASE);
3431 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
3432 GPU_LIT_TPC_IN_GPC_STRIDE);
3433
3434 return egpc_base + (gpc * gpc_stride) +
3435 tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) +
3436 addr;
3437}
3438
3439void gv11b_gr_get_egpc_etpc_num(struct gk20a *g, u32 addr,
3440 u32 *egpc_num, u32 *etpc_num)
3441{
3442 u32 egpc_addr = 0;
3443
3444 *egpc_num = pri_get_egpc_num(g, addr);
3445 egpc_addr = pri_gpccs_addr_mask(addr);
3446 *etpc_num = g->ops.gr.get_tpc_num(g, egpc_addr);
3447
3448 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
3449 "egpc_num = %d etpc_num = %d", *egpc_num, *etpc_num);
3450}
3451
3452int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type,
3453 u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags)
3454{
3455 u32 gpc_addr;
3456 u32 tpc_addr;
3457
3458 if (g->ops.gr.is_egpc_addr(g, addr)) {
3459 nvgpu_log_info(g, "addr=0x%x is egpc", addr);
3460
3461 *addr_type = CTXSW_ADDR_TYPE_EGPC;
3462 gpc_addr = pri_gpccs_addr_mask(addr);
3463 if (pri_is_egpc_addr_shared(g, addr)) {
3464 *broadcast_flags |= PRI_BROADCAST_FLAGS_EGPC;
3465 *gpc_num = 0;
3466 nvgpu_log_info(g, "shared egpc");
3467 } else {
3468 *gpc_num = pri_get_egpc_num(g, addr);
3469 nvgpu_log_info(g, "gpc=0x%x", *gpc_num);
3470 }
3471 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
3472 nvgpu_log_info(g, "addr=0x%x is etpc", addr);
3473 *addr_type = CTXSW_ADDR_TYPE_ETPC;
3474 if (pri_is_tpc_addr_shared(g, gpc_addr)) {
3475 *broadcast_flags |= PRI_BROADCAST_FLAGS_ETPC;
3476 *tpc_num = 0;
3477 nvgpu_log_info(g, "shared etpc");
3478 } else {
3479 *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
3480 nvgpu_log_info(g, "tpc=0x%x", *tpc_num);
3481 }
3482 tpc_addr = pri_tpccs_addr_mask(addr);
3483 if (pri_is_smpc_addr_in_etpc_shared(g, tpc_addr))
3484 *broadcast_flags |= PRI_BROADCAST_FLAGS_SMPC;
3485 }
3486
3487 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
3488 "addr_type = %d, broadcast_flags = %#08x",
3489 *addr_type, *broadcast_flags);
3490 return 0;
3491 }
3492 return -EINVAL;
3493}
3494
3495static void gv11b_gr_update_priv_addr_table_smpc(struct gk20a *g, u32 gpc_num,
3496 u32 tpc_num, u32 addr,
3497 u32 *priv_addr_table, u32 *t)
3498{
3499 u32 sm_per_tpc, sm_num;
3500
3501 nvgpu_log_info(g, "broadcast flags smpc");
3502
3503 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
3504 for (sm_num = 0; sm_num < sm_per_tpc; sm_num++) {
3505 priv_addr_table[*t] = pri_smpc_ext_addr(g,
3506 pri_smpc_in_etpc_addr_mask(g, addr),
3507 gpc_num, tpc_num, sm_num);
3508 nvgpu_log_info(g, "priv_addr_table[%d]:%#08x",
3509 *t, priv_addr_table[*t]);
3510 (*t)++;
3511 }
3512}
3513
3514void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
3515 u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t)
3516{
3517 u32 gpc_num, tpc_num;
3518
3519 nvgpu_log_info(g, "addr=0x%x", addr);
3520
3521 /* The GPC/TPC unicast registers are included in the compressed PRI
3522 * tables. Convert a GPC/TPC broadcast address to unicast addresses so
3523 * that we can look up the offsets.
3524 */
3525 if (broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) {
3526 nvgpu_log_info(g, "broadcast flags egpc");
3527 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
3528
3529 if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) {
3530 nvgpu_log_info(g, "broadcast flags etpc");
3531 for (tpc_num = 0;
3532 tpc_num < g->gr.gpc_tpc_count[gpc_num];
3533 tpc_num++) {
3534 if (broadcast_flags &
3535 PRI_BROADCAST_FLAGS_SMPC) {
3536 gv11b_gr_update_priv_addr_table_smpc(
3537 g, gpc_num, tpc_num, addr,
3538 priv_addr_table, t);
3539 } else {
3540 priv_addr_table[*t] =
3541 pri_etpc_addr(g,
3542 pri_tpccs_addr_mask(addr),
3543 gpc_num, tpc_num);
3544 nvgpu_log_info(g,
3545 "priv_addr_table[%d]:%#08x",
3546 *t, priv_addr_table[*t]);
3547 (*t)++;
3548 }
3549 }
3550 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) {
3551 tpc_num = 0;
3552 gv11b_gr_update_priv_addr_table_smpc(
3553 g, gpc_num, tpc_num, addr,
3554 priv_addr_table, t);
3555 } else {
3556 priv_addr_table[*t] =
3557 pri_egpc_addr(g,
3558 pri_gpccs_addr_mask(addr),
3559 gpc_num);
3560 nvgpu_log_info(g, "priv_addr_table[%d]:%#08x",
3561 *t, priv_addr_table[*t]);
3562 (*t)++;
3563 }
3564 }
3565 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) {
3566 if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) {
3567 nvgpu_log_info(g, "broadcast flags etpc but not egpc");
3568 gpc_num = 0;
3569 for (tpc_num = 0;
3570 tpc_num < g->gr.gpc_tpc_count[gpc];
3571 tpc_num++) {
3572 if (broadcast_flags &
3573 PRI_BROADCAST_FLAGS_SMPC)
3574 gv11b_gr_update_priv_addr_table_smpc(
3575 g, gpc_num, tpc_num, addr,
3576 priv_addr_table, t);
3577 else {
3578 priv_addr_table[*t] =
3579 pri_etpc_addr(g,
3580 pri_tpccs_addr_mask(addr),
3581 gpc, tpc_num);
3582 nvgpu_log_info(g,
3583 "priv_addr_table[%d]:%#08x",
3584 *t, priv_addr_table[*t]);
3585 (*t)++;
3586 }
3587 }
3588 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) {
3589 tpc_num = 0;
3590 gpc_num = 0;
3591 gv11b_gr_update_priv_addr_table_smpc(
3592 g, gpc_num, tpc_num, addr,
3593 priv_addr_table, t);
3594 } else {
3595 priv_addr_table[*t] = addr;
3596 nvgpu_log_info(g, "priv_addr_table[%d]:%#08x",
3597 *t, priv_addr_table[*t]);
3598 (*t)++;
3599 }
3600 }
3601}
3602
3603u32 gv11b_gr_get_egpc_base(struct gk20a *g)
3604{
3605 return EGPC_PRI_BASE;
3606}
3607
3608void gr_gv11b_init_gpc_mmu(struct gk20a *g)
3609{
3610 u32 temp;
3611
3612 nvgpu_log_info(g, "initialize gpc mmu");
3613
3614 if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
3615 /* Bypass MMU check for non-secure boot. For
3616 * secure-boot,this register write has no-effect */
3617 gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff);
3618 }
3619 temp = gk20a_readl(g, fb_mmu_ctrl_r());
3620 temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() |
3621 gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() |
3622 gr_gpcs_pri_mmu_ctrl_vol_fault_m() |
3623 gr_gpcs_pri_mmu_ctrl_comp_fault_m() |
3624 gr_gpcs_pri_mmu_ctrl_miss_gran_m() |
3625 gr_gpcs_pri_mmu_ctrl_cache_mode_m() |
3626 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
3627 gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
3628 gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
3629 gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
3630 gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
3631 gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
3632
3633 gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
3634 gk20a_readl(g, fb_mmu_debug_ctrl_r()));
3635 gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
3636 gk20a_readl(g, fb_mmu_debug_wr_r()));
3637 gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
3638 gk20a_readl(g, fb_mmu_debug_rd_r()));
3639}