diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3639 |
1 files changed, 3639 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c new file mode 100644 index 00000000..3d817d7e --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -0,0 +1,3639 @@ | |||
1 | /* | ||
2 | * GV11b GPU GR | ||
3 | * | ||
4 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/delay.h> | ||
26 | #include <linux/version.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | #include <linux/tegra_gpu_t19x.h> | ||
29 | #include <uapi/linux/nvgpu.h> | ||
30 | |||
31 | #include <soc/tegra/fuse.h> | ||
32 | |||
33 | #include <nvgpu/timers.h> | ||
34 | #include <nvgpu/gmmu.h> | ||
35 | #include <nvgpu/dma.h> | ||
36 | #include <nvgpu/log.h> | ||
37 | #include <nvgpu/debug.h> | ||
38 | #include <nvgpu/enabled.h> | ||
39 | |||
40 | #include "gk20a/gk20a.h" | ||
41 | #include "gk20a/gr_gk20a.h" | ||
42 | #include "gk20a/dbg_gpu_gk20a.h" | ||
43 | #include "gk20a/regops_gk20a.h" | ||
44 | #include "gk20a/gr_pri_gk20a.h" | ||
45 | |||
46 | #include "gm20b/gr_gm20b.h" | ||
47 | |||
48 | #include "gp10b/gr_gp10b.h" | ||
49 | |||
50 | #include "gv11b/gr_gv11b.h" | ||
51 | #include "gv11b/mm_gv11b.h" | ||
52 | #include "gv11b/subctx_gv11b.h" | ||
53 | |||
54 | #include <nvgpu/hw/gv11b/hw_gr_gv11b.h> | ||
55 | #include <nvgpu/hw/gv11b/hw_fifo_gv11b.h> | ||
56 | #include <nvgpu/hw/gv11b/hw_proj_gv11b.h> | ||
57 | #include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h> | ||
58 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> | ||
59 | #include <nvgpu/hw/gv11b/hw_ram_gv11b.h> | ||
60 | #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> | ||
61 | #include <nvgpu/hw/gv11b/hw_therm_gv11b.h> | ||
62 | #include <nvgpu/hw/gv11b/hw_fb_gv11b.h> | ||
63 | |||
64 | bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) | ||
65 | { | ||
66 | bool valid = false; | ||
67 | |||
68 | switch (class_num) { | ||
69 | case VOLTA_COMPUTE_A: | ||
70 | case VOLTA_A: | ||
71 | case VOLTA_DMA_COPY_A: | ||
72 | valid = true; | ||
73 | break; | ||
74 | |||
75 | case MAXWELL_COMPUTE_B: | ||
76 | case MAXWELL_B: | ||
77 | case FERMI_TWOD_A: | ||
78 | case KEPLER_DMA_COPY_A: | ||
79 | case MAXWELL_DMA_COPY_A: | ||
80 | case PASCAL_COMPUTE_A: | ||
81 | case PASCAL_A: | ||
82 | case PASCAL_DMA_COPY_A: | ||
83 | valid = true; | ||
84 | break; | ||
85 | |||
86 | default: | ||
87 | break; | ||
88 | } | ||
89 | gk20a_dbg_info("class=0x%x valid=%d", class_num, valid); | ||
90 | return valid; | ||
91 | } | ||
92 | |||
93 | bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
94 | { | ||
95 | bool valid = false; | ||
96 | |||
97 | switch (class_num) { | ||
98 | case VOLTA_A: | ||
99 | case PASCAL_A: | ||
100 | case MAXWELL_B: | ||
101 | valid = true; | ||
102 | break; | ||
103 | |||
104 | default: | ||
105 | break; | ||
106 | } | ||
107 | return valid; | ||
108 | } | ||
109 | |||
110 | bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
111 | { | ||
112 | bool valid = false; | ||
113 | |||
114 | switch (class_num) { | ||
115 | case VOLTA_COMPUTE_A: | ||
116 | case PASCAL_COMPUTE_A: | ||
117 | case MAXWELL_COMPUTE_B: | ||
118 | valid = true; | ||
119 | break; | ||
120 | |||
121 | default: | ||
122 | break; | ||
123 | } | ||
124 | return valid; | ||
125 | } | ||
126 | |||
127 | static u32 gv11b_gr_sm_offset(struct gk20a *g, u32 sm) | ||
128 | { | ||
129 | |||
130 | u32 sm_pri_stride = nvgpu_get_litter_value(g, GPU_LIT_SM_PRI_STRIDE); | ||
131 | u32 sm_offset = sm_pri_stride * sm; | ||
132 | |||
133 | return sm_offset; | ||
134 | } | ||
135 | |||
136 | static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
137 | bool *post_event, struct channel_gk20a *fault_ch, | ||
138 | u32 *hww_global_esr) | ||
139 | { | ||
140 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
141 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
142 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
143 | u32 l1_tag_ecc_status, l1_tag_ecc_corrected_err_status = 0; | ||
144 | u32 l1_tag_ecc_uncorrected_err_status = 0; | ||
145 | u32 l1_tag_corrected_err_count_delta = 0; | ||
146 | u32 l1_tag_uncorrected_err_count_delta = 0; | ||
147 | bool is_l1_tag_ecc_corrected_total_err_overflow = 0; | ||
148 | bool is_l1_tag_ecc_uncorrected_total_err_overflow = 0; | ||
149 | |||
150 | /* Check for L1 tag ECC errors. */ | ||
151 | l1_tag_ecc_status = gk20a_readl(g, | ||
152 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset); | ||
153 | l1_tag_ecc_corrected_err_status = l1_tag_ecc_status & | ||
154 | (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() | | ||
155 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m() | | ||
156 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m() | | ||
157 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()); | ||
158 | l1_tag_ecc_uncorrected_err_status = l1_tag_ecc_status & | ||
159 | (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() | | ||
160 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m() | | ||
161 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m() | | ||
162 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()); | ||
163 | |||
164 | if ((l1_tag_ecc_corrected_err_status == 0) && (l1_tag_ecc_uncorrected_err_status == 0)) | ||
165 | return 0; | ||
166 | |||
167 | l1_tag_corrected_err_count_delta = | ||
168 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_v( | ||
169 | gk20a_readl(g, | ||
170 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + | ||
171 | offset)); | ||
172 | l1_tag_uncorrected_err_count_delta = | ||
173 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_v( | ||
174 | gk20a_readl(g, | ||
175 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + | ||
176 | offset)); | ||
177 | is_l1_tag_ecc_corrected_total_err_overflow = | ||
178 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_total_counter_overflow_v(l1_tag_ecc_status); | ||
179 | is_l1_tag_ecc_uncorrected_total_err_overflow = | ||
180 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_total_counter_overflow_v(l1_tag_ecc_status); | ||
181 | |||
182 | if ((l1_tag_corrected_err_count_delta > 0) || is_l1_tag_ecc_corrected_total_err_overflow) { | ||
183 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
184 | "corrected error (SBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]", | ||
185 | l1_tag_ecc_corrected_err_status, is_l1_tag_ecc_corrected_total_err_overflow); | ||
186 | |||
187 | /* HW uses 16-bits counter */ | ||
188 | l1_tag_corrected_err_count_delta += | ||
189 | (is_l1_tag_ecc_corrected_total_err_overflow << | ||
190 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); | ||
191 | g->ecc.gr.t19x.sm_l1_tag_corrected_err_count.counters[tpc] += | ||
192 | l1_tag_corrected_err_count_delta; | ||
193 | gk20a_writel(g, | ||
194 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, | ||
195 | 0); | ||
196 | } | ||
197 | if ((l1_tag_uncorrected_err_count_delta > 0) || is_l1_tag_ecc_uncorrected_total_err_overflow) { | ||
198 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
199 | "Uncorrected error (DBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]", | ||
200 | l1_tag_ecc_uncorrected_err_status, is_l1_tag_ecc_uncorrected_total_err_overflow); | ||
201 | |||
202 | /* HW uses 16-bits counter */ | ||
203 | l1_tag_uncorrected_err_count_delta += | ||
204 | (is_l1_tag_ecc_uncorrected_total_err_overflow << | ||
205 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); | ||
206 | g->ecc.gr.t19x.sm_l1_tag_uncorrected_err_count.counters[tpc] += | ||
207 | l1_tag_uncorrected_err_count_delta; | ||
208 | gk20a_writel(g, | ||
209 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, | ||
210 | 0); | ||
211 | } | ||
212 | |||
213 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset, | ||
214 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_reset_task_f()); | ||
215 | |||
216 | return 0; | ||
217 | |||
218 | } | ||
219 | |||
220 | static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
221 | bool *post_event, struct channel_gk20a *fault_ch, | ||
222 | u32 *hww_global_esr) | ||
223 | { | ||
224 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
225 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
226 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
227 | u32 lrf_ecc_status, lrf_ecc_corrected_err_status = 0; | ||
228 | u32 lrf_ecc_uncorrected_err_status = 0; | ||
229 | u32 lrf_corrected_err_count_delta = 0; | ||
230 | u32 lrf_uncorrected_err_count_delta = 0; | ||
231 | bool is_lrf_ecc_corrected_total_err_overflow = 0; | ||
232 | bool is_lrf_ecc_uncorrected_total_err_overflow = 0; | ||
233 | |||
234 | /* Check for LRF ECC errors. */ | ||
235 | lrf_ecc_status = gk20a_readl(g, | ||
236 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); | ||
237 | lrf_ecc_corrected_err_status = lrf_ecc_status & | ||
238 | (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp0_m() | | ||
239 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp1_m() | | ||
240 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp2_m() | | ||
241 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp3_m() | | ||
242 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp4_m() | | ||
243 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp5_m() | | ||
244 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp6_m() | | ||
245 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_qrfdp7_m()); | ||
246 | lrf_ecc_uncorrected_err_status = lrf_ecc_status & | ||
247 | (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp0_m() | | ||
248 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp1_m() | | ||
249 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp2_m() | | ||
250 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp3_m() | | ||
251 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp4_m() | | ||
252 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp5_m() | | ||
253 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp6_m() | | ||
254 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_qrfdp7_m()); | ||
255 | |||
256 | if ((lrf_ecc_corrected_err_status == 0) && (lrf_ecc_uncorrected_err_status == 0)) | ||
257 | return 0; | ||
258 | |||
259 | lrf_corrected_err_count_delta = | ||
260 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_v( | ||
261 | gk20a_readl(g, | ||
262 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + | ||
263 | offset)); | ||
264 | lrf_uncorrected_err_count_delta = | ||
265 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_v( | ||
266 | gk20a_readl(g, | ||
267 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + | ||
268 | offset)); | ||
269 | is_lrf_ecc_corrected_total_err_overflow = | ||
270 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_corrected_err_total_counter_overflow_v(lrf_ecc_status); | ||
271 | is_lrf_ecc_uncorrected_total_err_overflow = | ||
272 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_uncorrected_err_total_counter_overflow_v(lrf_ecc_status); | ||
273 | |||
274 | if ((lrf_corrected_err_count_delta > 0) || is_lrf_ecc_corrected_total_err_overflow) { | ||
275 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
276 | "corrected error (SBE) detected in SM LRF! err_mask [%08x] is_overf [%d]", | ||
277 | lrf_ecc_corrected_err_status, is_lrf_ecc_corrected_total_err_overflow); | ||
278 | |||
279 | /* HW uses 16-bits counter */ | ||
280 | lrf_corrected_err_count_delta += | ||
281 | (is_lrf_ecc_corrected_total_err_overflow << | ||
282 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); | ||
283 | g->ecc.gr.t18x.sm_lrf_single_err_count.counters[tpc] += | ||
284 | lrf_corrected_err_count_delta; | ||
285 | gk20a_writel(g, | ||
286 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, | ||
287 | 0); | ||
288 | } | ||
289 | if ((lrf_uncorrected_err_count_delta > 0) || is_lrf_ecc_uncorrected_total_err_overflow) { | ||
290 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
291 | "Uncorrected error (DBE) detected in SM LRF! err_mask [%08x] is_overf [%d]", | ||
292 | lrf_ecc_uncorrected_err_status, is_lrf_ecc_uncorrected_total_err_overflow); | ||
293 | |||
294 | /* HW uses 16-bits counter */ | ||
295 | lrf_uncorrected_err_count_delta += | ||
296 | (is_lrf_ecc_uncorrected_total_err_overflow << | ||
297 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); | ||
298 | g->ecc.gr.t18x.sm_lrf_double_err_count.counters[tpc] += | ||
299 | lrf_uncorrected_err_count_delta; | ||
300 | gk20a_writel(g, | ||
301 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, | ||
302 | 0); | ||
303 | } | ||
304 | |||
305 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, | ||
306 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_reset_task_f()); | ||
307 | |||
308 | return 0; | ||
309 | |||
310 | } | ||
311 | |||
312 | void gr_gv11b_enable_hww_exceptions(struct gk20a *g) | ||
313 | { | ||
314 | /* enable exceptions */ | ||
315 | gk20a_writel(g, gr_fe_hww_esr_r(), | ||
316 | gr_fe_hww_esr_en_enable_f() | | ||
317 | gr_fe_hww_esr_reset_active_f()); | ||
318 | gk20a_writel(g, gr_memfmt_hww_esr_r(), | ||
319 | gr_memfmt_hww_esr_en_enable_f() | | ||
320 | gr_memfmt_hww_esr_reset_active_f()); | ||
321 | } | ||
322 | |||
323 | void gr_gv11b_enable_exceptions(struct gk20a *g) | ||
324 | { | ||
325 | struct gr_gk20a *gr = &g->gr; | ||
326 | u32 reg_val; | ||
327 | |||
328 | /* | ||
329 | * clear exceptions : | ||
330 | * other than SM : hww_esr are reset in *enable_hww_excetpions* | ||
331 | * SM : cleared in *set_hww_esr_report_mask* | ||
332 | */ | ||
333 | |||
334 | /* enable exceptions */ | ||
335 | gk20a_writel(g, gr_exception2_en_r(), 0x0); /* BE not enabled */ | ||
336 | gk20a_writel(g, gr_exception1_en_r(), (1 << gr->gpc_count) - 1); | ||
337 | |||
338 | reg_val = gr_exception_en_fe_enabled_f() | | ||
339 | gr_exception_en_memfmt_enabled_f() | | ||
340 | gr_exception_en_ds_enabled_f() | | ||
341 | gr_exception_en_gpc_enabled_f(); | ||
342 | gk20a_writel(g, gr_exception_en_r(), reg_val); | ||
343 | |||
344 | } | ||
345 | |||
346 | static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
347 | bool *post_event, struct channel_gk20a *fault_ch, | ||
348 | u32 *hww_global_esr) | ||
349 | { | ||
350 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
351 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
352 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
353 | u32 cbu_ecc_status, cbu_ecc_corrected_err_status = 0; | ||
354 | u32 cbu_ecc_uncorrected_err_status = 0; | ||
355 | u32 cbu_corrected_err_count_delta = 0; | ||
356 | u32 cbu_uncorrected_err_count_delta = 0; | ||
357 | bool is_cbu_ecc_corrected_total_err_overflow = 0; | ||
358 | bool is_cbu_ecc_uncorrected_total_err_overflow = 0; | ||
359 | |||
360 | /* Check for CBU ECC errors. */ | ||
361 | cbu_ecc_status = gk20a_readl(g, | ||
362 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_r() + offset); | ||
363 | cbu_ecc_corrected_err_status = cbu_ecc_status & | ||
364 | (gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_warp_sm0_m() | | ||
365 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_warp_sm1_m() | | ||
366 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_barrier_sm0_m() | | ||
367 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_barrier_sm1_m()); | ||
368 | cbu_ecc_uncorrected_err_status = cbu_ecc_status & | ||
369 | (gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_warp_sm0_m() | | ||
370 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_warp_sm1_m() | | ||
371 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_barrier_sm0_m() | | ||
372 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_barrier_sm1_m()); | ||
373 | |||
374 | if ((cbu_ecc_corrected_err_status == 0) && (cbu_ecc_uncorrected_err_status == 0)) | ||
375 | return 0; | ||
376 | |||
377 | cbu_corrected_err_count_delta = | ||
378 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_v( | ||
379 | gk20a_readl(g, | ||
380 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + | ||
381 | offset)); | ||
382 | cbu_uncorrected_err_count_delta = | ||
383 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_v( | ||
384 | gk20a_readl(g, | ||
385 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + | ||
386 | offset)); | ||
387 | is_cbu_ecc_corrected_total_err_overflow = | ||
388 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_corrected_err_total_counter_overflow_v(cbu_ecc_status); | ||
389 | is_cbu_ecc_uncorrected_total_err_overflow = | ||
390 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_uncorrected_err_total_counter_overflow_v(cbu_ecc_status); | ||
391 | |||
392 | if ((cbu_corrected_err_count_delta > 0) || is_cbu_ecc_corrected_total_err_overflow) { | ||
393 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
394 | "corrected error (SBE) detected in SM CBU! err_mask [%08x] is_overf [%d]", | ||
395 | cbu_ecc_corrected_err_status, is_cbu_ecc_corrected_total_err_overflow); | ||
396 | |||
397 | /* HW uses 16-bits counter */ | ||
398 | cbu_corrected_err_count_delta += | ||
399 | (is_cbu_ecc_corrected_total_err_overflow << | ||
400 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); | ||
401 | g->ecc.gr.t19x.sm_cbu_corrected_err_count.counters[tpc] += | ||
402 | cbu_corrected_err_count_delta; | ||
403 | gk20a_writel(g, | ||
404 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, | ||
405 | 0); | ||
406 | } | ||
407 | if ((cbu_uncorrected_err_count_delta > 0) || is_cbu_ecc_uncorrected_total_err_overflow) { | ||
408 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
409 | "Uncorrected error (DBE) detected in SM CBU! err_mask [%08x] is_overf [%d]", | ||
410 | cbu_ecc_uncorrected_err_status, is_cbu_ecc_uncorrected_total_err_overflow); | ||
411 | |||
412 | /* HW uses 16-bits counter */ | ||
413 | cbu_uncorrected_err_count_delta += | ||
414 | (is_cbu_ecc_uncorrected_total_err_overflow << | ||
415 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); | ||
416 | g->ecc.gr.t19x.sm_cbu_uncorrected_err_count.counters[tpc] += | ||
417 | cbu_uncorrected_err_count_delta; | ||
418 | gk20a_writel(g, | ||
419 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, | ||
420 | 0); | ||
421 | } | ||
422 | |||
423 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_status_r() + offset, | ||
424 | gr_pri_gpc0_tpc0_sm_cbu_ecc_status_reset_task_f()); | ||
425 | |||
426 | return 0; | ||
427 | |||
428 | } | ||
429 | |||
430 | static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
431 | bool *post_event, struct channel_gk20a *fault_ch, | ||
432 | u32 *hww_global_esr) | ||
433 | { | ||
434 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
435 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
436 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
437 | u32 l1_data_ecc_status, l1_data_ecc_corrected_err_status = 0; | ||
438 | u32 l1_data_ecc_uncorrected_err_status = 0; | ||
439 | u32 l1_data_corrected_err_count_delta = 0; | ||
440 | u32 l1_data_uncorrected_err_count_delta = 0; | ||
441 | bool is_l1_data_ecc_corrected_total_err_overflow = 0; | ||
442 | bool is_l1_data_ecc_uncorrected_total_err_overflow = 0; | ||
443 | |||
444 | /* Check for L1 data ECC errors. */ | ||
445 | l1_data_ecc_status = gk20a_readl(g, | ||
446 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset); | ||
447 | l1_data_ecc_corrected_err_status = l1_data_ecc_status & | ||
448 | (gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_el1_0_m() | | ||
449 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_el1_1_m()); | ||
450 | l1_data_ecc_uncorrected_err_status = l1_data_ecc_status & | ||
451 | (gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_el1_0_m() | | ||
452 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_el1_1_m()); | ||
453 | |||
454 | if ((l1_data_ecc_corrected_err_status == 0) && (l1_data_ecc_uncorrected_err_status == 0)) | ||
455 | return 0; | ||
456 | |||
457 | l1_data_corrected_err_count_delta = | ||
458 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_v( | ||
459 | gk20a_readl(g, | ||
460 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + | ||
461 | offset)); | ||
462 | l1_data_uncorrected_err_count_delta = | ||
463 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_v( | ||
464 | gk20a_readl(g, | ||
465 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + | ||
466 | offset)); | ||
467 | is_l1_data_ecc_corrected_total_err_overflow = | ||
468 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_corrected_err_total_counter_overflow_v(l1_data_ecc_status); | ||
469 | is_l1_data_ecc_uncorrected_total_err_overflow = | ||
470 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_uncorrected_err_total_counter_overflow_v(l1_data_ecc_status); | ||
471 | |||
472 | if ((l1_data_corrected_err_count_delta > 0) || is_l1_data_ecc_corrected_total_err_overflow) { | ||
473 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
474 | "corrected error (SBE) detected in SM L1 data! err_mask [%08x] is_overf [%d]", | ||
475 | l1_data_ecc_corrected_err_status, is_l1_data_ecc_corrected_total_err_overflow); | ||
476 | |||
477 | /* HW uses 16-bits counter */ | ||
478 | l1_data_corrected_err_count_delta += | ||
479 | (is_l1_data_ecc_corrected_total_err_overflow << | ||
480 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); | ||
481 | g->ecc.gr.t19x.sm_l1_data_corrected_err_count.counters[tpc] += | ||
482 | l1_data_corrected_err_count_delta; | ||
483 | gk20a_writel(g, | ||
484 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, | ||
485 | 0); | ||
486 | } | ||
487 | if ((l1_data_uncorrected_err_count_delta > 0) || is_l1_data_ecc_uncorrected_total_err_overflow) { | ||
488 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
489 | "Uncorrected error (DBE) detected in SM L1 data! err_mask [%08x] is_overf [%d]", | ||
490 | l1_data_ecc_uncorrected_err_status, is_l1_data_ecc_uncorrected_total_err_overflow); | ||
491 | |||
492 | /* HW uses 16-bits counter */ | ||
493 | l1_data_uncorrected_err_count_delta += | ||
494 | (is_l1_data_ecc_uncorrected_total_err_overflow << | ||
495 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); | ||
496 | g->ecc.gr.t19x.sm_l1_data_uncorrected_err_count.counters[tpc] += | ||
497 | l1_data_uncorrected_err_count_delta; | ||
498 | gk20a_writel(g, | ||
499 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, | ||
500 | 0); | ||
501 | } | ||
502 | |||
503 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset, | ||
504 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_reset_task_f()); | ||
505 | |||
506 | return 0; | ||
507 | |||
508 | } | ||
509 | |||
510 | static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
511 | bool *post_event, struct channel_gk20a *fault_ch, | ||
512 | u32 *hww_global_esr) | ||
513 | { | ||
514 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
515 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
516 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
517 | u32 icache_ecc_status, icache_ecc_corrected_err_status = 0; | ||
518 | u32 icache_ecc_uncorrected_err_status = 0; | ||
519 | u32 icache_corrected_err_count_delta = 0; | ||
520 | u32 icache_uncorrected_err_count_delta = 0; | ||
521 | bool is_icache_ecc_corrected_total_err_overflow = 0; | ||
522 | bool is_icache_ecc_uncorrected_total_err_overflow = 0; | ||
523 | |||
524 | /* Check for L0 && L1 icache ECC errors. */ | ||
525 | icache_ecc_status = gk20a_readl(g, | ||
526 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset); | ||
527 | icache_ecc_corrected_err_status = icache_ecc_status & | ||
528 | (gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m() | | ||
529 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m() | | ||
530 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m() | | ||
531 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()); | ||
532 | icache_ecc_uncorrected_err_status = icache_ecc_status & | ||
533 | (gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m() | | ||
534 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m() | | ||
535 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m() | | ||
536 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()); | ||
537 | |||
538 | if ((icache_ecc_corrected_err_status == 0) && (icache_ecc_uncorrected_err_status == 0)) | ||
539 | return 0; | ||
540 | |||
541 | icache_corrected_err_count_delta = | ||
542 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_v( | ||
543 | gk20a_readl(g, | ||
544 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + | ||
545 | offset)); | ||
546 | icache_uncorrected_err_count_delta = | ||
547 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_v( | ||
548 | gk20a_readl(g, | ||
549 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + | ||
550 | offset)); | ||
551 | is_icache_ecc_corrected_total_err_overflow = | ||
552 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_total_counter_overflow_v(icache_ecc_status); | ||
553 | is_icache_ecc_uncorrected_total_err_overflow = | ||
554 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_total_counter_overflow_v(icache_ecc_status); | ||
555 | |||
556 | if ((icache_corrected_err_count_delta > 0) || is_icache_ecc_corrected_total_err_overflow) { | ||
557 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
558 | "corrected error (SBE) detected in SM L0 && L1 icache! err_mask [%08x] is_overf [%d]", | ||
559 | icache_ecc_corrected_err_status, is_icache_ecc_corrected_total_err_overflow); | ||
560 | |||
561 | /* HW uses 16-bits counter */ | ||
562 | icache_corrected_err_count_delta += | ||
563 | (is_icache_ecc_corrected_total_err_overflow << | ||
564 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); | ||
565 | g->ecc.gr.t19x.sm_icache_corrected_err_count.counters[tpc] += | ||
566 | icache_corrected_err_count_delta; | ||
567 | gk20a_writel(g, | ||
568 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, | ||
569 | 0); | ||
570 | } | ||
571 | if ((icache_uncorrected_err_count_delta > 0) || is_icache_ecc_uncorrected_total_err_overflow) { | ||
572 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | ||
573 | "Uncorrected error (DBE) detected in SM L0 && L1 icache! err_mask [%08x] is_overf [%d]", | ||
574 | icache_ecc_uncorrected_err_status, is_icache_ecc_uncorrected_total_err_overflow); | ||
575 | |||
576 | /* HW uses 16-bits counter */ | ||
577 | icache_uncorrected_err_count_delta += | ||
578 | (is_icache_ecc_uncorrected_total_err_overflow << | ||
579 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); | ||
580 | g->ecc.gr.t19x.sm_icache_uncorrected_err_count.counters[tpc] += | ||
581 | icache_uncorrected_err_count_delta; | ||
582 | gk20a_writel(g, | ||
583 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, | ||
584 | 0); | ||
585 | } | ||
586 | |||
587 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset, | ||
588 | gr_pri_gpc0_tpc0_sm_icache_ecc_status_reset_task_f()); | ||
589 | |||
590 | return 0; | ||
591 | |||
592 | } | ||
593 | |||
594 | int gr_gv11b_handle_tpc_sm_ecc_exception(struct gk20a *g, | ||
595 | u32 gpc, u32 tpc, | ||
596 | bool *post_event, struct channel_gk20a *fault_ch, | ||
597 | u32 *hww_global_esr) | ||
598 | { | ||
599 | int ret = 0; | ||
600 | |||
601 | /* Check for L1 tag ECC errors. */ | ||
602 | gr_gv11b_handle_l1_tag_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); | ||
603 | |||
604 | /* Check for LRF ECC errors. */ | ||
605 | gr_gv11b_handle_lrf_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); | ||
606 | |||
607 | /* Check for CBU ECC errors. */ | ||
608 | gr_gv11b_handle_cbu_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); | ||
609 | |||
610 | /* Check for L1 data ECC errors. */ | ||
611 | gr_gv11b_handle_l1_data_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); | ||
612 | |||
613 | /* Check for L0 && L1 icache ECC errors. */ | ||
614 | gr_gv11b_handle_icache_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); | ||
615 | |||
616 | return ret; | ||
617 | } | ||
618 | |||
619 | int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
620 | bool *post_event, struct channel_gk20a *fault_ch, | ||
621 | u32 *hww_global_esr) | ||
622 | { | ||
623 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
624 | u32 offset = gpc_stride * gpc; | ||
625 | u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0; | ||
626 | u32 gcc_l15_ecc_uncorrected_err_status = 0; | ||
627 | u32 gcc_l15_corrected_err_count_delta = 0; | ||
628 | u32 gcc_l15_uncorrected_err_count_delta = 0; | ||
629 | bool is_gcc_l15_ecc_corrected_total_err_overflow = 0; | ||
630 | bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0; | ||
631 | |||
632 | /* Check for gcc l15 ECC errors. */ | ||
633 | gcc_l15_ecc_status = gk20a_readl(g, | ||
634 | gr_pri_gpc0_gcc_l15_ecc_status_r() + offset); | ||
635 | gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status & | ||
636 | (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() | | ||
637 | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m()); | ||
638 | gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status & | ||
639 | (gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() | | ||
640 | gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m()); | ||
641 | |||
642 | if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0)) | ||
643 | return 0; | ||
644 | |||
645 | gcc_l15_corrected_err_count_delta = | ||
646 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v( | ||
647 | gk20a_readl(g, | ||
648 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + | ||
649 | offset)); | ||
650 | gcc_l15_uncorrected_err_count_delta = | ||
651 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v( | ||
652 | gk20a_readl(g, | ||
653 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + | ||
654 | offset)); | ||
655 | is_gcc_l15_ecc_corrected_total_err_overflow = | ||
656 | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status); | ||
657 | is_gcc_l15_ecc_uncorrected_total_err_overflow = | ||
658 | gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status); | ||
659 | |||
660 | if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) { | ||
661 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, | ||
662 | "corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", | ||
663 | gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow); | ||
664 | |||
665 | /* HW uses 16-bits counter */ | ||
666 | gcc_l15_corrected_err_count_delta += | ||
667 | (is_gcc_l15_ecc_corrected_total_err_overflow << | ||
668 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); | ||
669 | g->ecc.gr.t19x.gcc_l15_corrected_err_count.counters[gpc] += | ||
670 | gcc_l15_corrected_err_count_delta; | ||
671 | gk20a_writel(g, | ||
672 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, | ||
673 | 0); | ||
674 | } | ||
675 | if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) { | ||
676 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, | ||
677 | "Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", | ||
678 | gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow); | ||
679 | |||
680 | /* HW uses 16-bits counter */ | ||
681 | gcc_l15_uncorrected_err_count_delta += | ||
682 | (is_gcc_l15_ecc_uncorrected_total_err_overflow << | ||
683 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); | ||
684 | g->ecc.gr.t19x.gcc_l15_uncorrected_err_count.counters[gpc] += | ||
685 | gcc_l15_uncorrected_err_count_delta; | ||
686 | gk20a_writel(g, | ||
687 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, | ||
688 | 0); | ||
689 | } | ||
690 | |||
691 | gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset, | ||
692 | gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f()); | ||
693 | |||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, | ||
698 | u32 exception) | ||
699 | { | ||
700 | int ret = 0; | ||
701 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
702 | u32 offset = gpc_stride * gpc; | ||
703 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | ||
704 | u32 corrected_delta, uncorrected_delta; | ||
705 | u32 corrected_overflow, uncorrected_overflow; | ||
706 | int hww_esr; | ||
707 | |||
708 | hww_esr = gk20a_readl(g, gr_gpc0_mmu_gpcmmu_global_esr_r() + offset); | ||
709 | |||
710 | if (!(hww_esr & (gr_gpc0_mmu_gpcmmu_global_esr_ecc_corrected_m() | | ||
711 | gr_gpc0_mmu_gpcmmu_global_esr_ecc_uncorrected_m()))) | ||
712 | return ret; | ||
713 | |||
714 | ecc_status = gk20a_readl(g, | ||
715 | gr_gpc0_mmu_l1tlb_ecc_status_r() + offset); | ||
716 | ecc_addr = gk20a_readl(g, | ||
717 | gr_gpc0_mmu_l1tlb_ecc_address_r() + offset); | ||
718 | corrected_cnt = gk20a_readl(g, | ||
719 | gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_r() + offset); | ||
720 | uncorrected_cnt = gk20a_readl(g, | ||
721 | gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_r() + offset); | ||
722 | |||
723 | corrected_delta = gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_v( | ||
724 | corrected_cnt); | ||
725 | uncorrected_delta = gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_v( | ||
726 | uncorrected_cnt); | ||
727 | corrected_overflow = ecc_status & | ||
728 | gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_total_counter_overflow_m(); | ||
729 | |||
730 | uncorrected_overflow = ecc_status & | ||
731 | gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
732 | |||
733 | |||
734 | /* clear the interrupt */ | ||
735 | if ((corrected_delta > 0) || corrected_overflow) | ||
736 | gk20a_writel(g, | ||
737 | gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_r() + | ||
738 | offset, 0); | ||
739 | if ((uncorrected_delta > 0) || uncorrected_overflow) | ||
740 | gk20a_writel(g, | ||
741 | gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_r() + | ||
742 | offset, 0); | ||
743 | |||
744 | gk20a_writel(g, gr_gpc0_mmu_l1tlb_ecc_status_r() + offset, | ||
745 | gr_gpc0_mmu_l1tlb_ecc_status_reset_task_f()); | ||
746 | |||
747 | /* Handle overflow */ | ||
748 | if (corrected_overflow) | ||
749 | corrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_s()); | ||
750 | if (uncorrected_overflow) | ||
751 | uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); | ||
752 | |||
753 | |||
754 | g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count.counters[gpc] += | ||
755 | corrected_delta; | ||
756 | g->ecc.gr.t19x.mmu_l1tlb_uncorrected_err_count.counters[gpc] += | ||
757 | uncorrected_delta; | ||
758 | nvgpu_log(g, gpu_dbg_intr, | ||
759 | "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); | ||
760 | |||
761 | if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) | ||
762 | nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); | ||
763 | if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) | ||
764 | nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); | ||
765 | if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) | ||
766 | nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error"); | ||
767 | if (ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) | ||
768 | nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error"); | ||
769 | if (corrected_overflow || uncorrected_overflow) | ||
770 | nvgpu_info(g, "mmu l1tlb ecc counter overflow!"); | ||
771 | |||
772 | nvgpu_log(g, gpu_dbg_intr, | ||
773 | "ecc error address: 0x%x", ecc_addr); | ||
774 | nvgpu_log(g, gpu_dbg_intr, | ||
775 | "ecc error count corrected: %d, uncorrected %d", | ||
776 | g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count.counters[gpc], | ||
777 | g->ecc.gr.t19x.mmu_l1tlb_uncorrected_err_count.counters[gpc]); | ||
778 | |||
779 | return ret; | ||
780 | } | ||
781 | |||
782 | static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, | ||
783 | u32 exception) | ||
784 | { | ||
785 | int ret = 0; | ||
786 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
787 | u32 offset = gpc_stride * gpc; | ||
788 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | ||
789 | u32 corrected_delta, uncorrected_delta; | ||
790 | u32 corrected_overflow, uncorrected_overflow; | ||
791 | int hww_esr; | ||
792 | |||
793 | hww_esr = gk20a_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset); | ||
794 | |||
795 | if (!(hww_esr & (gr_gpc0_gpccs_hww_esr_ecc_uncorrected_m() | | ||
796 | gr_gpc0_gpccs_hww_esr_ecc_corrected_m()))) | ||
797 | return ret; | ||
798 | |||
799 | ecc_status = gk20a_readl(g, | ||
800 | gr_gpc0_gpccs_falcon_ecc_status_r() + offset); | ||
801 | ecc_addr = gk20a_readl(g, | ||
802 | gr_gpc0_gpccs_falcon_ecc_address_r() + offset); | ||
803 | corrected_cnt = gk20a_readl(g, | ||
804 | gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + offset); | ||
805 | uncorrected_cnt = gk20a_readl(g, | ||
806 | gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset); | ||
807 | |||
808 | corrected_delta = gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v( | ||
809 | corrected_cnt); | ||
810 | uncorrected_delta = gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v( | ||
811 | uncorrected_cnt); | ||
812 | corrected_overflow = ecc_status & | ||
813 | gr_gpc0_gpccs_falcon_ecc_status_corrected_err_total_counter_overflow_m(); | ||
814 | |||
815 | uncorrected_overflow = ecc_status & | ||
816 | gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
817 | |||
818 | |||
819 | /* clear the interrupt */ | ||
820 | if ((corrected_delta > 0) || corrected_overflow) | ||
821 | gk20a_writel(g, | ||
822 | gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + | ||
823 | offset, 0); | ||
824 | if ((uncorrected_delta > 0) || uncorrected_overflow) | ||
825 | gk20a_writel(g, | ||
826 | gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + | ||
827 | offset, 0); | ||
828 | |||
829 | gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, | ||
830 | gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); | ||
831 | |||
832 | g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc] += | ||
833 | corrected_delta; | ||
834 | g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc] += | ||
835 | uncorrected_delta; | ||
836 | nvgpu_log(g, gpu_dbg_intr, | ||
837 | "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); | ||
838 | |||
839 | if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) | ||
840 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); | ||
841 | if (ecc_status & | ||
842 | gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) | ||
843 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); | ||
844 | if (ecc_status & | ||
845 | gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) | ||
846 | nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); | ||
847 | if (ecc_status & | ||
848 | gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) | ||
849 | nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); | ||
850 | if (corrected_overflow || uncorrected_overflow) | ||
851 | nvgpu_info(g, "gpccs ecc counter overflow!"); | ||
852 | |||
853 | nvgpu_log(g, gpu_dbg_intr, | ||
854 | "ecc error row address: 0x%x", | ||
855 | gr_gpc0_gpccs_falcon_ecc_address_row_address_v(ecc_addr)); | ||
856 | |||
857 | nvgpu_log(g, gpu_dbg_intr, | ||
858 | "ecc error count corrected: %d, uncorrected %d", | ||
859 | g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc], | ||
860 | g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc]); | ||
861 | |||
862 | return ret; | ||
863 | } | ||
864 | |||
865 | int gr_gv11b_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, | ||
866 | u32 gpc_exception) | ||
867 | { | ||
868 | if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpcmmu_m()) | ||
869 | return gr_gv11b_handle_gpcmmu_ecc_exception(g, gpc, | ||
870 | gpc_exception); | ||
871 | return 0; | ||
872 | } | ||
873 | |||
874 | int gr_gv11b_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, | ||
875 | u32 gpc_exception) | ||
876 | { | ||
877 | if (gpc_exception & gr_gpc0_gpccs_gpc_exception_gpccs_m()) | ||
878 | return gr_gv11b_handle_gpccs_ecc_exception(g, gpc, | ||
879 | gpc_exception); | ||
880 | |||
881 | return 0; | ||
882 | } | ||
883 | |||
884 | void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | ||
885 | { | ||
886 | struct gr_gk20a *gr = &g->gr; | ||
887 | u32 tpc_mask; | ||
888 | |||
889 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
890 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f() | | ||
891 | gr_gpcs_tpcs_tpccs_tpc_exception_en_mpc_enabled_f()); | ||
892 | |||
893 | tpc_mask = | ||
894 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); | ||
895 | |||
896 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), | ||
897 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | | ||
898 | gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) | | ||
899 | gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1))); | ||
900 | } | ||
901 | |||
902 | int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
903 | bool *post_event) | ||
904 | { | ||
905 | return 0; | ||
906 | } | ||
907 | |||
908 | int gr_gv11b_zbc_s_query_table(struct gk20a *g, struct gr_gk20a *gr, | ||
909 | struct zbc_query_params *query_params) | ||
910 | { | ||
911 | u32 index = query_params->index_size; | ||
912 | |||
913 | if (index >= GK20A_ZBC_TABLE_SIZE) { | ||
914 | nvgpu_err(g, "invalid zbc stencil table index"); | ||
915 | return -EINVAL; | ||
916 | } | ||
917 | query_params->depth = gr->zbc_s_tbl[index].stencil; | ||
918 | query_params->format = gr->zbc_s_tbl[index].format; | ||
919 | query_params->ref_cnt = gr->zbc_s_tbl[index].ref_cnt; | ||
920 | |||
921 | return 0; | ||
922 | } | ||
923 | |||
924 | bool gr_gv11b_add_zbc_type_s(struct gk20a *g, struct gr_gk20a *gr, | ||
925 | struct zbc_entry *zbc_val, int *ret_val) | ||
926 | { | ||
927 | struct zbc_s_table *s_tbl; | ||
928 | u32 i; | ||
929 | bool added = false; | ||
930 | |||
931 | *ret_val = -ENOMEM; | ||
932 | |||
933 | /* search existing tables */ | ||
934 | for (i = 0; i < gr->max_used_s_index; i++) { | ||
935 | |||
936 | s_tbl = &gr->zbc_s_tbl[i]; | ||
937 | |||
938 | if (s_tbl->ref_cnt && | ||
939 | s_tbl->stencil == zbc_val->depth && | ||
940 | s_tbl->format == zbc_val->format) { | ||
941 | added = true; | ||
942 | s_tbl->ref_cnt++; | ||
943 | *ret_val = 0; | ||
944 | break; | ||
945 | } | ||
946 | } | ||
947 | /* add new table */ | ||
948 | if (!added && | ||
949 | gr->max_used_s_index < GK20A_ZBC_TABLE_SIZE) { | ||
950 | |||
951 | s_tbl = &gr->zbc_s_tbl[gr->max_used_s_index]; | ||
952 | WARN_ON(s_tbl->ref_cnt != 0); | ||
953 | |||
954 | *ret_val = g->ops.gr.add_zbc_s(g, gr, | ||
955 | zbc_val, gr->max_used_s_index); | ||
956 | |||
957 | if (!(*ret_val)) | ||
958 | gr->max_used_s_index++; | ||
959 | } | ||
960 | return added; | ||
961 | } | ||
962 | |||
963 | int gr_gv11b_add_zbc_stencil(struct gk20a *g, struct gr_gk20a *gr, | ||
964 | struct zbc_entry *stencil_val, u32 index) | ||
965 | { | ||
966 | u32 zbc_s; | ||
967 | |||
968 | /* update l2 table */ | ||
969 | g->ops.ltc.set_zbc_s_entry(g, stencil_val, index); | ||
970 | |||
971 | /* update local copy */ | ||
972 | gr->zbc_s_tbl[index].stencil = stencil_val->depth; | ||
973 | gr->zbc_s_tbl[index].format = stencil_val->format; | ||
974 | gr->zbc_s_tbl[index].ref_cnt++; | ||
975 | |||
976 | gk20a_writel(g, gr_gpcs_swdx_dss_zbc_s_r(index), stencil_val->depth); | ||
977 | zbc_s = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_s_01_to_04_format_r() + | ||
978 | (index & ~3)); | ||
979 | zbc_s &= ~(0x7f << (index % 4) * 7); | ||
980 | zbc_s |= stencil_val->format << (index % 4) * 7; | ||
981 | gk20a_writel(g, gr_gpcs_swdx_dss_zbc_s_01_to_04_format_r() + | ||
982 | (index & ~3), zbc_s); | ||
983 | |||
984 | return 0; | ||
985 | } | ||
986 | |||
987 | int gr_gv11b_load_stencil_default_tbl(struct gk20a *g, | ||
988 | struct gr_gk20a *gr) | ||
989 | { | ||
990 | struct zbc_entry zbc_val; | ||
991 | u32 err; | ||
992 | |||
993 | /* load default stencil table */ | ||
994 | zbc_val.type = GV11B_ZBC_TYPE_STENCIL; | ||
995 | |||
996 | zbc_val.depth = 0x0; | ||
997 | zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8; | ||
998 | err = gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
999 | |||
1000 | zbc_val.depth = 0x1; | ||
1001 | zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8; | ||
1002 | err |= gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
1003 | |||
1004 | zbc_val.depth = 0xff; | ||
1005 | zbc_val.format = ZBC_STENCIL_CLEAR_FMT_U8; | ||
1006 | err |= gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
1007 | |||
1008 | if (!err) { | ||
1009 | gr->max_default_s_index = 3; | ||
1010 | } else { | ||
1011 | nvgpu_err(g, "fail to load default zbc stencil table"); | ||
1012 | return err; | ||
1013 | } | ||
1014 | |||
1015 | return 0; | ||
1016 | } | ||
1017 | |||
1018 | int gr_gv11b_load_stencil_tbl(struct gk20a *g, struct gr_gk20a *gr) | ||
1019 | { | ||
1020 | int ret; | ||
1021 | u32 i; | ||
1022 | |||
1023 | for (i = 0; i < gr->max_used_s_index; i++) { | ||
1024 | struct zbc_s_table *s_tbl = &gr->zbc_s_tbl[i]; | ||
1025 | struct zbc_entry zbc_val; | ||
1026 | |||
1027 | zbc_val.type = GV11B_ZBC_TYPE_STENCIL; | ||
1028 | zbc_val.depth = s_tbl->stencil; | ||
1029 | zbc_val.format = s_tbl->format; | ||
1030 | |||
1031 | ret = g->ops.gr.add_zbc_s(g, gr, &zbc_val, i); | ||
1032 | if (ret) | ||
1033 | return ret; | ||
1034 | } | ||
1035 | return 0; | ||
1036 | } | ||
1037 | |||
1038 | u32 gr_gv11b_pagepool_default_size(struct gk20a *g) | ||
1039 | { | ||
1040 | return gr_scc_pagepool_total_pages_hwmax_value_v(); | ||
1041 | } | ||
1042 | |||
1043 | int gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g) | ||
1044 | { | ||
1045 | struct gr_gk20a *gr = &g->gr; | ||
1046 | int size; | ||
1047 | |||
1048 | gr->attrib_cb_size = gr->attrib_cb_default_size; | ||
1049 | gr->alpha_cb_size = gr->alpha_cb_default_size; | ||
1050 | |||
1051 | gr->attrib_cb_size = min(gr->attrib_cb_size, | ||
1052 | gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count); | ||
1053 | gr->alpha_cb_size = min(gr->alpha_cb_size, | ||
1054 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count); | ||
1055 | |||
1056 | size = gr->attrib_cb_size * | ||
1057 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * | ||
1058 | gr->max_tpc_count; | ||
1059 | |||
1060 | size += gr->alpha_cb_size * | ||
1061 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * | ||
1062 | gr->max_tpc_count; | ||
1063 | |||
1064 | size = ALIGN(size, 128); | ||
1065 | |||
1066 | return size; | ||
1067 | } | ||
1068 | |||
1069 | static void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data) | ||
1070 | { | ||
1071 | gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); | ||
1072 | } | ||
1073 | |||
1074 | static void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data) | ||
1075 | { | ||
1076 | u32 val; | ||
1077 | |||
1078 | gk20a_dbg_fn(""); | ||
1079 | |||
1080 | val = gk20a_readl(g, gr_gpcs_tc_debug0_r()); | ||
1081 | val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(), | ||
1082 | gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data)); | ||
1083 | gk20a_writel(g, gr_gpcs_tc_debug0_r(), val); | ||
1084 | |||
1085 | gk20a_dbg_fn("done"); | ||
1086 | } | ||
1087 | |||
1088 | static void gr_gv11b_set_tex_in_dbg(struct gk20a *g, u32 data) | ||
1089 | { | ||
1090 | u32 val; | ||
1091 | bool flag; | ||
1092 | |||
1093 | gk20a_dbg_fn(""); | ||
1094 | |||
1095 | val = gk20a_readl(g, gr_gpcs_tpcs_tex_in_dbg_r()); | ||
1096 | flag = (data & NVC397_SET_TEX_IN_DBG_TSL1_RVCH_INVALIDATE) ? 1 : 0; | ||
1097 | val = set_field(val, gr_gpcs_tpcs_tex_in_dbg_tsl1_rvch_invalidate_m(), | ||
1098 | gr_gpcs_tpcs_tex_in_dbg_tsl1_rvch_invalidate_f(flag)); | ||
1099 | gk20a_writel(g, gr_gpcs_tpcs_tex_in_dbg_r(), val); | ||
1100 | |||
1101 | val = gk20a_readl(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r()); | ||
1102 | flag = (data & | ||
1103 | NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_LD) ? 1 : 0; | ||
1104 | val = set_field(val, gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_ld_m(), | ||
1105 | gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_ld_f(flag)); | ||
1106 | flag = (data & | ||
1107 | NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_ST) ? 1 : 0; | ||
1108 | val = set_field(val, gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_st_m(), | ||
1109 | gr_gpcs_tpcs_sm_l1tag_ctrl_cache_surface_st_f(flag)); | ||
1110 | gk20a_writel(g, gr_gpcs_tpcs_sm_l1tag_ctrl_r(), val); | ||
1111 | } | ||
1112 | |||
1113 | static void gr_gv11b_set_skedcheck(struct gk20a *g, u32 data) | ||
1114 | { | ||
1115 | u32 reg_val; | ||
1116 | |||
1117 | reg_val = gk20a_readl(g, gr_sked_hww_esr_en_r()); | ||
1118 | |||
1119 | if ((data & NVC397_SET_SKEDCHECK_18_MASK) == | ||
1120 | NVC397_SET_SKEDCHECK_18_DISABLE) { | ||
1121 | reg_val = set_field(reg_val, | ||
1122 | gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(), | ||
1123 | gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_disabled_f() | ||
1124 | ); | ||
1125 | } else if ((data & NVC397_SET_SKEDCHECK_18_MASK) == | ||
1126 | NVC397_SET_SKEDCHECK_18_ENABLE) { | ||
1127 | reg_val = set_field(reg_val, | ||
1128 | gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_m(), | ||
1129 | gr_sked_hww_esr_en_skedcheck18_l1_config_too_small_enabled_f() | ||
1130 | ); | ||
1131 | } | ||
1132 | nvgpu_log_info(g, "sked_hww_esr_en = 0x%x", reg_val); | ||
1133 | gk20a_writel(g, gr_sked_hww_esr_en_r(), reg_val); | ||
1134 | |||
1135 | } | ||
1136 | |||
1137 | static void gv11b_gr_set_shader_exceptions(struct gk20a *g, u32 data) | ||
1138 | { | ||
1139 | gk20a_dbg_fn(""); | ||
1140 | |||
1141 | if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) { | ||
1142 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(), | ||
1143 | 0); | ||
1144 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), | ||
1145 | 0); | ||
1146 | } else { | ||
1147 | g->ops.gr.set_hww_esr_report_mask(g); | ||
1148 | } | ||
1149 | } | ||
1150 | |||
1151 | int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, | ||
1152 | u32 class_num, u32 offset, u32 data) | ||
1153 | { | ||
1154 | gk20a_dbg_fn(""); | ||
1155 | |||
1156 | if (class_num == VOLTA_COMPUTE_A) { | ||
1157 | switch (offset << 2) { | ||
1158 | case NVC0C0_SET_SHADER_EXCEPTIONS: | ||
1159 | gv11b_gr_set_shader_exceptions(g, data); | ||
1160 | break; | ||
1161 | case NVC3C0_SET_SKEDCHECK: | ||
1162 | gr_gv11b_set_skedcheck(g, data); | ||
1163 | break; | ||
1164 | default: | ||
1165 | goto fail; | ||
1166 | } | ||
1167 | } | ||
1168 | |||
1169 | if (class_num == VOLTA_A) { | ||
1170 | switch (offset << 2) { | ||
1171 | case NVC397_SET_SHADER_EXCEPTIONS: | ||
1172 | gv11b_gr_set_shader_exceptions(g, data); | ||
1173 | break; | ||
1174 | case NVC397_SET_CIRCULAR_BUFFER_SIZE: | ||
1175 | g->ops.gr.set_circular_buffer_size(g, data); | ||
1176 | break; | ||
1177 | case NVC397_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
1178 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
1179 | break; | ||
1180 | case NVC397_SET_GO_IDLE_TIMEOUT: | ||
1181 | gr_gv11b_set_go_idle_timeout(g, data); | ||
1182 | break; | ||
1183 | case NVC097_SET_COALESCE_BUFFER_SIZE: | ||
1184 | gr_gv11b_set_coalesce_buffer_size(g, data); | ||
1185 | break; | ||
1186 | case NVC397_SET_TEX_IN_DBG: | ||
1187 | gr_gv11b_set_tex_in_dbg(g, data); | ||
1188 | break; | ||
1189 | case NVC397_SET_SKEDCHECK: | ||
1190 | gr_gv11b_set_skedcheck(g, data); | ||
1191 | break; | ||
1192 | case NVC397_SET_BES_CROP_DEBUG3: | ||
1193 | g->ops.gr.set_bes_crop_debug3(g, data); | ||
1194 | break; | ||
1195 | default: | ||
1196 | goto fail; | ||
1197 | } | ||
1198 | } | ||
1199 | return 0; | ||
1200 | |||
1201 | fail: | ||
1202 | return -EINVAL; | ||
1203 | } | ||
1204 | |||
1205 | void gr_gv11b_bundle_cb_defaults(struct gk20a *g) | ||
1206 | { | ||
1207 | struct gr_gk20a *gr = &g->gr; | ||
1208 | |||
1209 | gr->bundle_cb_default_size = | ||
1210 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
1211 | gr->min_gpm_fifo_depth = | ||
1212 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
1213 | gr->bundle_cb_token_limit = | ||
1214 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
1215 | } | ||
1216 | |||
1217 | void gr_gv11b_cb_size_default(struct gk20a *g) | ||
1218 | { | ||
1219 | struct gr_gk20a *gr = &g->gr; | ||
1220 | |||
1221 | if (!gr->attrib_cb_default_size) | ||
1222 | gr->attrib_cb_default_size = | ||
1223 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
1224 | gr->alpha_cb_default_size = | ||
1225 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
1226 | } | ||
1227 | |||
1228 | void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
1229 | { | ||
1230 | struct gr_gk20a *gr = &g->gr; | ||
1231 | u32 gpc_index, ppc_index, stride, val; | ||
1232 | u32 pd_ab_max_output; | ||
1233 | u32 alpha_cb_size = data * 4; | ||
1234 | |||
1235 | gk20a_dbg_fn(""); | ||
1236 | |||
1237 | if (alpha_cb_size > gr->alpha_cb_size) | ||
1238 | alpha_cb_size = gr->alpha_cb_size; | ||
1239 | |||
1240 | gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(), | ||
1241 | (gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) & | ||
1242 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
1243 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
1244 | |||
1245 | pd_ab_max_output = alpha_cb_size * | ||
1246 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / | ||
1247 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
1248 | |||
1249 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
1250 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
1251 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | ||
1252 | |||
1253 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1254 | stride = proj_gpc_stride_v() * gpc_index; | ||
1255 | |||
1256 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
1257 | ppc_index++) { | ||
1258 | |||
1259 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
1260 | stride + | ||
1261 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
1262 | |||
1263 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | ||
1264 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | ||
1265 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
1266 | |||
1267 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
1268 | stride + | ||
1269 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
1270 | } | ||
1271 | } | ||
1272 | } | ||
1273 | |||
1274 | void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
1275 | { | ||
1276 | struct gr_gk20a *gr = &g->gr; | ||
1277 | u32 gpc_index, ppc_index, stride, val; | ||
1278 | u32 cb_size_steady = data * 4, cb_size; | ||
1279 | |||
1280 | gk20a_dbg_fn(""); | ||
1281 | |||
1282 | if (cb_size_steady > gr->attrib_cb_size) | ||
1283 | cb_size_steady = gr->attrib_cb_size; | ||
1284 | if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) != | ||
1285 | gk20a_readl(g, | ||
1286 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) { | ||
1287 | cb_size = cb_size_steady + | ||
1288 | (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - | ||
1289 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); | ||
1290 | } else { | ||
1291 | cb_size = cb_size_steady; | ||
1292 | } | ||
1293 | |||
1294 | gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(), | ||
1295 | (gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) & | ||
1296 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
1297 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); | ||
1298 | |||
1299 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1300 | stride = proj_gpc_stride_v() * gpc_index; | ||
1301 | |||
1302 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
1303 | ppc_index++) { | ||
1304 | |||
1305 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
1306 | stride + | ||
1307 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
1308 | |||
1309 | val = set_field(val, | ||
1310 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | ||
1311 | gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * | ||
1312 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
1313 | |||
1314 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
1315 | stride + | ||
1316 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
1317 | |||
1318 | gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index + | ||
1319 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + | ||
1320 | stride, | ||
1321 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f( | ||
1322 | cb_size_steady)); | ||
1323 | |||
1324 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
1325 | ppc_index + gpc_index)); | ||
1326 | |||
1327 | val = set_field(val, | ||
1328 | gr_gpcs_swdx_tc_beta_cb_size_v_m(), | ||
1329 | gr_gpcs_swdx_tc_beta_cb_size_v_f( | ||
1330 | cb_size_steady * | ||
1331 | gr->gpc_ppc_count[gpc_index])); | ||
1332 | |||
1333 | gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
1334 | ppc_index + gpc_index), val); | ||
1335 | } | ||
1336 | } | ||
1337 | } | ||
1338 | |||
1339 | int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size, | ||
1340 | struct nvgpu_mem *mem) | ||
1341 | { | ||
1342 | int err; | ||
1343 | |||
1344 | gk20a_dbg_fn(""); | ||
1345 | |||
1346 | err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem); | ||
1347 | if (err) | ||
1348 | return err; | ||
1349 | |||
1350 | mem->gpu_va = nvgpu_gmmu_map(vm, | ||
1351 | mem, | ||
1352 | size, | ||
1353 | NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE, | ||
1354 | gk20a_mem_flag_none, | ||
1355 | false, | ||
1356 | mem->aperture); | ||
1357 | |||
1358 | if (!mem->gpu_va) { | ||
1359 | err = -ENOMEM; | ||
1360 | goto fail_free; | ||
1361 | } | ||
1362 | |||
1363 | return 0; | ||
1364 | |||
1365 | fail_free: | ||
1366 | nvgpu_dma_free(vm->mm->g, mem); | ||
1367 | return err; | ||
1368 | } | ||
1369 | |||
1370 | static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g, | ||
1371 | struct gk20a_debug_output *o, | ||
1372 | u32 gpc, u32 tpc, u32 sm, u32 offset) | ||
1373 | { | ||
1374 | |||
1375 | gk20a_debug_output(o, | ||
1376 | "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR: 0x%x\n", | ||
1377 | gpc, tpc, sm, gk20a_readl(g, | ||
1378 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset)); | ||
1379 | |||
1380 | gk20a_debug_output(o, | ||
1381 | "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR_REPORT_MASK: 0x%x\n", | ||
1382 | gpc, tpc, sm, gk20a_readl(g, | ||
1383 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset)); | ||
1384 | |||
1385 | gk20a_debug_output(o, | ||
1386 | "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR: 0x%x\n", | ||
1387 | gpc, tpc, sm, gk20a_readl(g, | ||
1388 | gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset)); | ||
1389 | |||
1390 | gk20a_debug_output(o, | ||
1391 | "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n", | ||
1392 | gpc, tpc, sm, gk20a_readl(g, | ||
1393 | gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset)); | ||
1394 | |||
1395 | gk20a_debug_output(o, | ||
1396 | "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_CONTROL0: 0x%x\n", | ||
1397 | gpc, tpc, sm, gk20a_readl(g, | ||
1398 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset)); | ||
1399 | |||
1400 | gk20a_debug_output(o, | ||
1401 | "NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_STATUS0: 0x%x\n", | ||
1402 | gpc, tpc, sm, gk20a_readl(g, | ||
1403 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset)); | ||
1404 | } | ||
1405 | |||
1406 | static int gr_gv11b_dump_gr_sm_regs(struct gk20a *g, | ||
1407 | struct gk20a_debug_output *o) | ||
1408 | { | ||
1409 | u32 gpc, tpc, sm, sm_per_tpc; | ||
1410 | u32 gpc_offset, tpc_offset, offset; | ||
1411 | |||
1412 | gk20a_debug_output(o, | ||
1413 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n", | ||
1414 | gk20a_readl(g, | ||
1415 | gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r())); | ||
1416 | gk20a_debug_output(o, | ||
1417 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_WARP_ESR_REPORT_MASK: 0x%x\n", | ||
1418 | gk20a_readl(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r())); | ||
1419 | gk20a_debug_output(o, | ||
1420 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR: 0x%x\n", | ||
1421 | gk20a_readl(g, gr_gpcs_tpcs_sms_hww_global_esr_r())); | ||
1422 | gk20a_debug_output(o, | ||
1423 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_CONTROL0: 0x%x\n", | ||
1424 | gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_control0_r())); | ||
1425 | gk20a_debug_output(o, | ||
1426 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_STATUS0: 0x%x\n", | ||
1427 | gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_status0_r())); | ||
1428 | gk20a_debug_output(o, | ||
1429 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_0: 0x%x\n", | ||
1430 | gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r())); | ||
1431 | gk20a_debug_output(o, | ||
1432 | "NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_1: 0x%x\n", | ||
1433 | gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r())); | ||
1434 | |||
1435 | sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
1436 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { | ||
1437 | gpc_offset = gk20a_gr_gpc_offset(g, gpc); | ||
1438 | |||
1439 | for (tpc = 0; tpc < g->gr.tpc_count; tpc++) { | ||
1440 | tpc_offset = gk20a_gr_tpc_offset(g, tpc); | ||
1441 | |||
1442 | for (sm = 0; sm < sm_per_tpc; sm++) { | ||
1443 | offset = gpc_offset + tpc_offset + | ||
1444 | gv11b_gr_sm_offset(g, sm); | ||
1445 | |||
1446 | gr_gv11b_dump_gr_per_sm_regs(g, o, | ||
1447 | gpc, tpc, sm, offset); | ||
1448 | } | ||
1449 | } | ||
1450 | } | ||
1451 | |||
1452 | return 0; | ||
1453 | } | ||
1454 | |||
1455 | int gr_gv11b_dump_gr_status_regs(struct gk20a *g, | ||
1456 | struct gk20a_debug_output *o) | ||
1457 | { | ||
1458 | struct gr_gk20a *gr = &g->gr; | ||
1459 | u32 gr_engine_id; | ||
1460 | |||
1461 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
1462 | |||
1463 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", | ||
1464 | gk20a_readl(g, gr_status_r())); | ||
1465 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", | ||
1466 | gk20a_readl(g, gr_status_1_r())); | ||
1467 | gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n", | ||
1468 | gk20a_readl(g, gr_status_2_r())); | ||
1469 | gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n", | ||
1470 | gk20a_readl(g, gr_engine_status_r())); | ||
1471 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n", | ||
1472 | gk20a_readl(g, gr_gpfifo_status_r())); | ||
1473 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n", | ||
1474 | gk20a_readl(g, gr_gpfifo_ctl_r())); | ||
1475 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", | ||
1476 | gk20a_readl(g, gr_fecs_host_int_status_r())); | ||
1477 | gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n", | ||
1478 | gk20a_readl(g, gr_exception_r())); | ||
1479 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", | ||
1480 | gk20a_readl(g, gr_fecs_intr_r())); | ||
1481 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", | ||
1482 | gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); | ||
1483 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", | ||
1484 | gk20a_readl(g, gr_activity_0_r())); | ||
1485 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", | ||
1486 | gk20a_readl(g, gr_activity_1_r())); | ||
1487 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n", | ||
1488 | gk20a_readl(g, gr_activity_2_r())); | ||
1489 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n", | ||
1490 | gk20a_readl(g, gr_activity_4_r())); | ||
1491 | gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n", | ||
1492 | gk20a_readl(g, gr_pri_sked_activity_r())); | ||
1493 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
1494 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r())); | ||
1495 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
1496 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r())); | ||
1497 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
1498 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r())); | ||
1499 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
1500 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); | ||
1501 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
1502 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); | ||
1503 | if (gr->gpc_tpc_count[0] == 2) | ||
1504 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
1505 | gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); | ||
1506 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
1507 | gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r())); | ||
1508 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
1509 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r())); | ||
1510 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
1511 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r())); | ||
1512 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
1513 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r())); | ||
1514 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
1515 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); | ||
1516 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
1517 | gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); | ||
1518 | if (gr->gpc_tpc_count[0] == 2) | ||
1519 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
1520 | gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); | ||
1521 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
1522 | gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r())); | ||
1523 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n", | ||
1524 | gk20a_readl(g, gr_pri_be0_becs_be_activity0_r())); | ||
1525 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n", | ||
1526 | gk20a_readl(g, gr_pri_be1_becs_be_activity0_r())); | ||
1527 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n", | ||
1528 | gk20a_readl(g, gr_pri_bes_becs_be_activity0_r())); | ||
1529 | gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n", | ||
1530 | gk20a_readl(g, gr_pri_ds_mpipe_status_r())); | ||
1531 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n", | ||
1532 | gk20a_readl(g, gr_fe_go_idle_timeout_r())); | ||
1533 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n", | ||
1534 | gk20a_readl(g, gr_pri_fe_go_idle_info_r())); | ||
1535 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n", | ||
1536 | gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); | ||
1537 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n", | ||
1538 | gk20a_readl(g, gr_cwd_fs_r())); | ||
1539 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS(0): 0x%x\n", | ||
1540 | gk20a_readl(g, gr_fe_tpc_fs_r(0))); | ||
1541 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID: 0x%x\n", | ||
1542 | gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0))); | ||
1543 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n", | ||
1544 | gk20a_readl(g, gr_cwd_sm_id_r(0))); | ||
1545 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n", | ||
1546 | gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r())); | ||
1547 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n", | ||
1548 | gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); | ||
1549 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n", | ||
1550 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r())); | ||
1551 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n", | ||
1552 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r())); | ||
1553 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n", | ||
1554 | gk20a_readl(g, gr_fecs_ctxsw_idlestate_r())); | ||
1555 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n", | ||
1556 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r())); | ||
1557 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n", | ||
1558 | gk20a_readl(g, gr_fecs_current_ctx_r())); | ||
1559 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", | ||
1560 | gk20a_readl(g, gr_fecs_new_ctx_r())); | ||
1561 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_ENABLE : 0x%x\n", | ||
1562 | gk20a_readl(g, gr_fecs_host_int_enable_r())); | ||
1563 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", | ||
1564 | gk20a_readl(g, gr_fecs_host_int_status_r())); | ||
1565 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", | ||
1566 | gk20a_readl(g, gr_pri_be0_crop_status1_r())); | ||
1567 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", | ||
1568 | gk20a_readl(g, gr_pri_bes_crop_status1_r())); | ||
1569 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n", | ||
1570 | gk20a_readl(g, gr_pri_be0_zrop_status_r())); | ||
1571 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n", | ||
1572 | gk20a_readl(g, gr_pri_be0_zrop_status2_r())); | ||
1573 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n", | ||
1574 | gk20a_readl(g, gr_pri_bes_zrop_status_r())); | ||
1575 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n", | ||
1576 | gk20a_readl(g, gr_pri_bes_zrop_status2_r())); | ||
1577 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n", | ||
1578 | gk20a_readl(g, gr_pri_be0_becs_be_exception_r())); | ||
1579 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n", | ||
1580 | gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r())); | ||
1581 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n", | ||
1582 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r())); | ||
1583 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n", | ||
1584 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r())); | ||
1585 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n", | ||
1586 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); | ||
1587 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", | ||
1588 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); | ||
1589 | |||
1590 | gr_gv11b_dump_gr_sm_regs(g, o); | ||
1591 | |||
1592 | return 0; | ||
1593 | } | ||
1594 | |||
1595 | static bool gr_activity_empty_or_preempted(u32 val) | ||
1596 | { | ||
1597 | while (val) { | ||
1598 | u32 v = val & 7; | ||
1599 | if (v != gr_activity_4_gpc0_empty_v() && | ||
1600 | v != gr_activity_4_gpc0_preempted_v()) | ||
1601 | return false; | ||
1602 | val >>= 3; | ||
1603 | } | ||
1604 | |||
1605 | return true; | ||
1606 | } | ||
1607 | |||
1608 | int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, | ||
1609 | u32 expect_delay) | ||
1610 | { | ||
1611 | u32 delay = expect_delay; | ||
1612 | bool gr_enabled; | ||
1613 | bool ctxsw_active; | ||
1614 | bool gr_busy; | ||
1615 | u32 gr_status; | ||
1616 | u32 activity0, activity1, activity2, activity4; | ||
1617 | struct nvgpu_timeout timeout; | ||
1618 | |||
1619 | gk20a_dbg_fn(""); | ||
1620 | |||
1621 | nvgpu_timeout_init(g, &timeout, duration_ms, NVGPU_TIMER_CPU_TIMER); | ||
1622 | |||
1623 | do { | ||
1624 | /* fmodel: host gets fifo_engine_status(gr) from gr | ||
1625 | only when gr_status is read */ | ||
1626 | gr_status = gk20a_readl(g, gr_status_r()); | ||
1627 | |||
1628 | gr_enabled = gk20a_readl(g, mc_enable_r()) & | ||
1629 | mc_enable_pgraph_enabled_f(); | ||
1630 | |||
1631 | ctxsw_active = gr_status & 1<<7; | ||
1632 | |||
1633 | activity0 = gk20a_readl(g, gr_activity_0_r()); | ||
1634 | activity1 = gk20a_readl(g, gr_activity_1_r()); | ||
1635 | activity2 = gk20a_readl(g, gr_activity_2_r()); | ||
1636 | activity4 = gk20a_readl(g, gr_activity_4_r()); | ||
1637 | |||
1638 | gr_busy = !(gr_activity_empty_or_preempted(activity0) && | ||
1639 | gr_activity_empty_or_preempted(activity1) && | ||
1640 | activity2 == 0 && | ||
1641 | gr_activity_empty_or_preempted(activity4)); | ||
1642 | |||
1643 | if (!gr_enabled || (!gr_busy && !ctxsw_active)) { | ||
1644 | gk20a_dbg_fn("done"); | ||
1645 | return 0; | ||
1646 | } | ||
1647 | |||
1648 | usleep_range(delay, delay * 2); | ||
1649 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
1650 | |||
1651 | } while (!nvgpu_timeout_expired(&timeout)); | ||
1652 | |||
1653 | nvgpu_err(g, | ||
1654 | "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", | ||
1655 | ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); | ||
1656 | |||
1657 | return -EAGAIN; | ||
1658 | } | ||
1659 | |||
1660 | void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, | ||
1661 | struct channel_ctx_gk20a *ch_ctx, | ||
1662 | u64 addr, bool patch) | ||
1663 | { | ||
1664 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
1665 | int attrBufferSize; | ||
1666 | |||
1667 | if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) | ||
1668 | attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size; | ||
1669 | else | ||
1670 | attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g); | ||
1671 | |||
1672 | attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); | ||
1673 | |||
1674 | gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); | ||
1675 | |||
1676 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), | ||
1677 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | | ||
1678 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); | ||
1679 | |||
1680 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), | ||
1681 | gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); | ||
1682 | |||
1683 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), | ||
1684 | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | | ||
1685 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); | ||
1686 | } | ||
1687 | |||
1688 | void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
1689 | { | ||
1690 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) | ||
1691 | tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0); | ||
1692 | tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0); | ||
1693 | #else | ||
1694 | tegra_fuse_control_write(0x1, FUSE_FUSEBYPASS_0); | ||
1695 | tegra_fuse_control_write(0x0, FUSE_WRITE_ACCESS_SW_0); | ||
1696 | #endif | ||
1697 | |||
1698 | if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) | ||
1699 | tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0); | ||
1700 | else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2) | ||
1701 | tegra_fuse_writel(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0); | ||
1702 | else | ||
1703 | tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0); | ||
1704 | } | ||
1705 | |||
1706 | void gr_gv11b_get_access_map(struct gk20a *g, | ||
1707 | u32 **whitelist, int *num_entries) | ||
1708 | { | ||
1709 | static u32 wl_addr_gv11b[] = { | ||
1710 | /* this list must be sorted (low to high) */ | ||
1711 | 0x404468, /* gr_pri_mme_max_instructions */ | ||
1712 | 0x418300, /* gr_pri_gpcs_rasterarb_line_class */ | ||
1713 | 0x418800, /* gr_pri_gpcs_setup_debug */ | ||
1714 | 0x418e00, /* gr_pri_gpcs_swdx_config */ | ||
1715 | 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1716 | 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1717 | 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1718 | 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1719 | 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1720 | 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1721 | 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1722 | 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1723 | 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1724 | 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1725 | 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1726 | 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1727 | 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1728 | 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1729 | 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1730 | 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1731 | 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1732 | 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1733 | 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1734 | 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1735 | 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1736 | 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */ | ||
1737 | 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ | ||
1738 | 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ | ||
1739 | 0x419e84, /* gr_pri_gpcs_tpcs_sms_dbgr_control0 */ | ||
1740 | 0x419ba4, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ | ||
1741 | }; | ||
1742 | |||
1743 | *whitelist = wl_addr_gv11b; | ||
1744 | *num_entries = ARRAY_SIZE(wl_addr_gv11b); | ||
1745 | } | ||
1746 | |||
1747 | /* @brief pre-process work on the SM exceptions to determine if we clear them or not. | ||
1748 | * | ||
1749 | * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing | ||
1750 | */ | ||
1751 | int gr_gv11b_pre_process_sm_exception(struct gk20a *g, | ||
1752 | u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, | ||
1753 | bool sm_debugger_attached, struct channel_gk20a *fault_ch, | ||
1754 | bool *early_exit, bool *ignore_debugger) | ||
1755 | { | ||
1756 | int ret; | ||
1757 | bool cilp_enabled = false; | ||
1758 | u32 global_mask = 0, dbgr_control0, global_esr_copy; | ||
1759 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | ||
1760 | gk20a_gr_tpc_offset(g, tpc) + | ||
1761 | gv11b_gr_sm_offset(g, sm); | ||
1762 | |||
1763 | *early_exit = false; | ||
1764 | *ignore_debugger = false; | ||
1765 | |||
1766 | if (fault_ch) | ||
1767 | cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == | ||
1768 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); | ||
1769 | |||
1770 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1771 | "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", | ||
1772 | gpc, tpc, sm, global_esr); | ||
1773 | |||
1774 | if (cilp_enabled && sm_debugger_attached) { | ||
1775 | if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f()) | ||
1776 | gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, | ||
1777 | gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f()); | ||
1778 | |||
1779 | if (global_esr & gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f()) | ||
1780 | gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, | ||
1781 | gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f()); | ||
1782 | |||
1783 | global_mask = gr_gpc0_tpc0_sm0_hww_global_esr_multiple_warp_errors_pending_f() | | ||
1784 | gr_gpc0_tpc0_sm0_hww_global_esr_bpt_pause_pending_f(); | ||
1785 | |||
1786 | if (warp_esr != 0 || (global_esr & global_mask) != 0) { | ||
1787 | *ignore_debugger = true; | ||
1788 | |||
1789 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1790 | "CILP: starting wait for LOCKED_DOWN on " | ||
1791 | "gpc %d tpc %d sm %d", | ||
1792 | gpc, tpc, sm); | ||
1793 | |||
1794 | if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) { | ||
1795 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1796 | "CILP: Broadcasting STOP_TRIGGER from " | ||
1797 | "gpc %d tpc %d sm %d", | ||
1798 | gpc, tpc, sm); | ||
1799 | g->ops.gr.suspend_all_sms(g, | ||
1800 | global_mask, false); | ||
1801 | |||
1802 | gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch); | ||
1803 | } else { | ||
1804 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1805 | "CILP: STOP_TRIGGER from " | ||
1806 | "gpc %d tpc %d sm %d", | ||
1807 | gpc, tpc, sm); | ||
1808 | g->ops.gr.suspend_single_sm(g, | ||
1809 | gpc, tpc, sm, global_mask, true); | ||
1810 | } | ||
1811 | |||
1812 | /* reset the HWW errors after locking down */ | ||
1813 | global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g, | ||
1814 | gpc, tpc, sm); | ||
1815 | g->ops.gr.clear_sm_hww(g, | ||
1816 | gpc, tpc, sm, global_esr_copy); | ||
1817 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1818 | "CILP: HWWs cleared for " | ||
1819 | "gpc %d tpc %d sm %d", | ||
1820 | gpc, tpc, sm); | ||
1821 | |||
1822 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n"); | ||
1823 | ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch); | ||
1824 | if (ret) { | ||
1825 | nvgpu_err(g, "CILP: error while setting CILP preempt pending!"); | ||
1826 | return ret; | ||
1827 | } | ||
1828 | |||
1829 | dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
1830 | if (dbgr_control0 & gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_enable_f()) { | ||
1831 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1832 | "CILP: clearing SINGLE_STEP_MODE " | ||
1833 | "before resume for gpc %d tpc %d sm %d", | ||
1834 | gpc, tpc, sm); | ||
1835 | dbgr_control0 = set_field(dbgr_control0, | ||
1836 | gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_m(), | ||
1837 | gr_gpc0_tpc0_sm0_dbgr_control0_single_step_mode_disable_f()); | ||
1838 | gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); | ||
1839 | } | ||
1840 | |||
1841 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1842 | "CILP: resume for gpc %d tpc %d sm %d", | ||
1843 | gpc, tpc, sm); | ||
1844 | g->ops.gr.resume_single_sm(g, gpc, tpc, sm); | ||
1845 | |||
1846 | *ignore_debugger = true; | ||
1847 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1848 | "CILP: All done on gpc %d, tpc %d sm %d", | ||
1849 | gpc, tpc, sm); | ||
1850 | } | ||
1851 | |||
1852 | *early_exit = true; | ||
1853 | } | ||
1854 | return 0; | ||
1855 | } | ||
1856 | |||
1857 | static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) | ||
1858 | { | ||
1859 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | ||
1860 | u32 corrected_delta, uncorrected_delta; | ||
1861 | u32 corrected_overflow, uncorrected_overflow; | ||
1862 | |||
1863 | if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() | | ||
1864 | gr_fecs_host_int_status_ecc_corrected_m())) { | ||
1865 | ecc_status = gk20a_readl(g, gr_fecs_falcon_ecc_status_r()); | ||
1866 | ecc_addr = gk20a_readl(g, | ||
1867 | gr_fecs_falcon_ecc_address_r()); | ||
1868 | corrected_cnt = gk20a_readl(g, | ||
1869 | gr_fecs_falcon_ecc_corrected_err_count_r()); | ||
1870 | uncorrected_cnt = gk20a_readl(g, | ||
1871 | gr_fecs_falcon_ecc_uncorrected_err_count_r()); | ||
1872 | |||
1873 | corrected_delta = | ||
1874 | gr_fecs_falcon_ecc_corrected_err_count_total_v( | ||
1875 | corrected_cnt); | ||
1876 | uncorrected_delta = | ||
1877 | gr_fecs_falcon_ecc_uncorrected_err_count_total_v( | ||
1878 | uncorrected_cnt); | ||
1879 | |||
1880 | corrected_overflow = ecc_status & | ||
1881 | gr_fecs_falcon_ecc_status_corrected_err_total_counter_overflow_m(); | ||
1882 | uncorrected_overflow = ecc_status & | ||
1883 | gr_fecs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
1884 | |||
1885 | /* clear the interrupt */ | ||
1886 | if ((corrected_delta > 0) || corrected_overflow) | ||
1887 | gk20a_writel(g, | ||
1888 | gr_fecs_falcon_ecc_corrected_err_count_r(), 0); | ||
1889 | if ((uncorrected_delta > 0) || uncorrected_overflow) | ||
1890 | gk20a_writel(g, | ||
1891 | gr_fecs_falcon_ecc_uncorrected_err_count_r(), | ||
1892 | 0); | ||
1893 | |||
1894 | |||
1895 | /* clear the interrupt */ | ||
1896 | gk20a_writel(g, gr_fecs_falcon_ecc_uncorrected_err_count_r(), | ||
1897 | 0); | ||
1898 | gk20a_writel(g, gr_fecs_falcon_ecc_corrected_err_count_r(), 0); | ||
1899 | |||
1900 | /* clear the interrupt */ | ||
1901 | gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), | ||
1902 | gr_fecs_falcon_ecc_status_reset_task_f()); | ||
1903 | |||
1904 | g->ecc.gr.t19x.fecs_corrected_err_count.counters[0] += | ||
1905 | corrected_delta; | ||
1906 | g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0] += | ||
1907 | uncorrected_delta; | ||
1908 | |||
1909 | nvgpu_log(g, gpu_dbg_intr, | ||
1910 | "fecs ecc interrupt intr: 0x%x", intr); | ||
1911 | |||
1912 | if (ecc_status & | ||
1913 | gr_fecs_falcon_ecc_status_corrected_err_imem_m()) | ||
1914 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); | ||
1915 | if (ecc_status & | ||
1916 | gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) | ||
1917 | nvgpu_log(g, gpu_dbg_intr, | ||
1918 | "imem ecc error uncorrected"); | ||
1919 | if (ecc_status & | ||
1920 | gr_fecs_falcon_ecc_status_corrected_err_dmem_m()) | ||
1921 | nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); | ||
1922 | if (ecc_status & | ||
1923 | gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) | ||
1924 | nvgpu_log(g, gpu_dbg_intr, | ||
1925 | "dmem ecc error uncorrected"); | ||
1926 | if (corrected_overflow || uncorrected_overflow) | ||
1927 | nvgpu_info(g, "fecs ecc counter overflow!"); | ||
1928 | |||
1929 | nvgpu_log(g, gpu_dbg_intr, | ||
1930 | "ecc error row address: 0x%x", | ||
1931 | gr_fecs_falcon_ecc_address_row_address_v(ecc_addr)); | ||
1932 | |||
1933 | nvgpu_log(g, gpu_dbg_intr, | ||
1934 | "ecc error count corrected: %d, uncorrected %d", | ||
1935 | g->ecc.gr.t19x.fecs_corrected_err_count.counters[0], | ||
1936 | g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0]); | ||
1937 | } | ||
1938 | } | ||
1939 | |||
1940 | int gr_gv11b_handle_fecs_error(struct gk20a *g, | ||
1941 | struct channel_gk20a *__ch, | ||
1942 | struct gr_gk20a_isr_data *isr_data) | ||
1943 | { | ||
1944 | u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); | ||
1945 | int ret; | ||
1946 | |||
1947 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | ||
1948 | |||
1949 | ret = gr_gp10b_handle_fecs_error(g, __ch, isr_data); | ||
1950 | |||
1951 | /* Handle ECC errors */ | ||
1952 | gr_gv11b_handle_fecs_ecc_error(g, gr_fecs_intr); | ||
1953 | |||
1954 | return ret; | ||
1955 | } | ||
1956 | |||
1957 | int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) | ||
1958 | { | ||
1959 | u32 map; | ||
1960 | u32 i, j, mapregs; | ||
1961 | u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
1962 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, | ||
1963 | GPU_LIT_NUM_TPC_PER_GPC); | ||
1964 | |||
1965 | gk20a_dbg_fn(""); | ||
1966 | |||
1967 | if (!gr->map_tiles) | ||
1968 | return -1; | ||
1969 | |||
1970 | gk20a_writel(g, gr_crstr_map_table_cfg_r(), | ||
1971 | gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | | ||
1972 | gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); | ||
1973 | |||
1974 | /* 6 tpc can be stored in one map register */ | ||
1975 | mapregs = (num_gpcs * num_tpc_per_gpc + 5) / 6; | ||
1976 | |||
1977 | for (i = 0, j = 0; i < mapregs; i++, j = j + 6) { | ||
1978 | map = gr_crstr_gpc_map_tile0_f(gr->map_tiles[j]) | | ||
1979 | gr_crstr_gpc_map_tile1_f(gr->map_tiles[j + 1]) | | ||
1980 | gr_crstr_gpc_map_tile2_f(gr->map_tiles[j + 2]) | | ||
1981 | gr_crstr_gpc_map_tile3_f(gr->map_tiles[j + 3]) | | ||
1982 | gr_crstr_gpc_map_tile4_f(gr->map_tiles[j + 4]) | | ||
1983 | gr_crstr_gpc_map_tile5_f(gr->map_tiles[j + 5]); | ||
1984 | |||
1985 | gk20a_writel(g, gr_crstr_gpc_map_r(i), map); | ||
1986 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map_r(i), map); | ||
1987 | gk20a_writel(g, gr_rstr2d_gpc_map_r(i), map); | ||
1988 | } | ||
1989 | |||
1990 | gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), | ||
1991 | gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | | ||
1992 | gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); | ||
1993 | |||
1994 | for (i = 0, j = 1; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v(); | ||
1995 | i++, j = j + 4) { | ||
1996 | gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), | ||
1997 | gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( | ||
1998 | ((1 << j) % gr->tpc_count)) | | ||
1999 | gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( | ||
2000 | ((1 << (j + 1)) % gr->tpc_count)) | | ||
2001 | gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( | ||
2002 | ((1 << (j + 2)) % gr->tpc_count)) | | ||
2003 | gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( | ||
2004 | ((1 << (j + 3)) % gr->tpc_count))); | ||
2005 | } | ||
2006 | |||
2007 | gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), | ||
2008 | gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | | ||
2009 | gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); | ||
2010 | |||
2011 | return 0; | ||
2012 | } | ||
2013 | |||
2014 | static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) | ||
2015 | { | ||
2016 | struct av_list_gk20a *sw_veid_bundle_init = | ||
2017 | &g->gr.ctx_vars.sw_veid_bundle_init; | ||
2018 | u32 j; | ||
2019 | u32 num_subctx, err = 0; | ||
2020 | |||
2021 | num_subctx = g->fifo.t19x.max_subctx_count; | ||
2022 | |||
2023 | for (j = 0; j < num_subctx; j++) { | ||
2024 | nvgpu_log_fn(g, "write bundle_address_r for subctx: %d", j); | ||
2025 | gk20a_writel(g, gr_pipe_bundle_address_r(), | ||
2026 | sw_veid_bundle_init->l[index].addr | | ||
2027 | gr_pipe_bundle_address_veid_f(j)); | ||
2028 | |||
2029 | err = gr_gk20a_wait_fe_idle(g, gk20a_get_gr_idle_timeout(g), | ||
2030 | GR_IDLE_CHECK_DEFAULT); | ||
2031 | } | ||
2032 | return err; | ||
2033 | } | ||
2034 | |||
2035 | int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) | ||
2036 | { | ||
2037 | struct av_list_gk20a *sw_veid_bundle_init = | ||
2038 | &g->gr.ctx_vars.sw_veid_bundle_init; | ||
2039 | u32 i; | ||
2040 | u32 last_bundle_data = 0; | ||
2041 | u32 err = 0; | ||
2042 | |||
2043 | for (i = 0; i < sw_veid_bundle_init->count; i++) { | ||
2044 | nvgpu_log_fn(g, "veid bundle count: %d", i); | ||
2045 | |||
2046 | if (i == 0 || last_bundle_data != | ||
2047 | sw_veid_bundle_init->l[i].value) { | ||
2048 | gk20a_writel(g, gr_pipe_bundle_data_r(), | ||
2049 | sw_veid_bundle_init->l[i].value); | ||
2050 | last_bundle_data = sw_veid_bundle_init->l[i].value; | ||
2051 | nvgpu_log_fn(g, "last_bundle_data : 0x%08x", | ||
2052 | last_bundle_data); | ||
2053 | } | ||
2054 | |||
2055 | if (gr_pipe_bundle_address_value_v( | ||
2056 | sw_veid_bundle_init->l[i].addr) == GR_GO_IDLE_BUNDLE) { | ||
2057 | nvgpu_log_fn(g, "go idle bundle"); | ||
2058 | gk20a_writel(g, gr_pipe_bundle_address_r(), | ||
2059 | sw_veid_bundle_init->l[i].addr); | ||
2060 | err |= gr_gk20a_wait_idle(g, | ||
2061 | gk20a_get_gr_idle_timeout(g), | ||
2062 | GR_IDLE_CHECK_DEFAULT); | ||
2063 | } else | ||
2064 | err = gv11b_write_bundle_veid_state(g, i); | ||
2065 | |||
2066 | if (err) { | ||
2067 | nvgpu_err(g, "failed to init sw veid bundle"); | ||
2068 | break; | ||
2069 | } | ||
2070 | } | ||
2071 | return err; | ||
2072 | } | ||
2073 | |||
2074 | void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, | ||
2075 | u32 *zcull_map_tiles) | ||
2076 | { | ||
2077 | u32 val, i, j; | ||
2078 | |||
2079 | gk20a_dbg_fn(""); | ||
2080 | |||
2081 | for (i = 0, j = 0; i < (zcull_num_entries / 8); i++, j += 8) { | ||
2082 | val = | ||
2083 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f( | ||
2084 | zcull_map_tiles[j+0]) | | ||
2085 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f( | ||
2086 | zcull_map_tiles[j+1]) | | ||
2087 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f( | ||
2088 | zcull_map_tiles[j+2]) | | ||
2089 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f( | ||
2090 | zcull_map_tiles[j+3]) | | ||
2091 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f( | ||
2092 | zcull_map_tiles[j+4]) | | ||
2093 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f( | ||
2094 | zcull_map_tiles[j+5]) | | ||
2095 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f( | ||
2096 | zcull_map_tiles[j+6]) | | ||
2097 | gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f( | ||
2098 | zcull_map_tiles[j+7]); | ||
2099 | |||
2100 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val); | ||
2101 | } | ||
2102 | } | ||
2103 | |||
2104 | void gr_gv11b_detect_sm_arch(struct gk20a *g) | ||
2105 | { | ||
2106 | u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); | ||
2107 | |||
2108 | g->params.sm_arch_spa_version = | ||
2109 | gr_gpc0_tpc0_sm_arch_spa_version_v(v); | ||
2110 | g->params.sm_arch_sm_version = | ||
2111 | gr_gpc0_tpc0_sm_arch_sm_version_v(v); | ||
2112 | g->params.sm_arch_warp_count = | ||
2113 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | ||
2114 | } | ||
2115 | |||
2116 | void gr_gv11b_program_sm_id_numbering(struct gk20a *g, | ||
2117 | u32 gpc, u32 tpc, u32 smid) | ||
2118 | { | ||
2119 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
2120 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
2121 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
2122 | u32 gpc_offset = gpc_stride * gpc; | ||
2123 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
2124 | u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; | ||
2125 | |||
2126 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
2127 | gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); | ||
2128 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
2129 | gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); | ||
2130 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
2131 | gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); | ||
2132 | } | ||
2133 | |||
2134 | int gr_gv11b_load_smid_config(struct gk20a *g) | ||
2135 | { | ||
2136 | u32 *tpc_sm_id; | ||
2137 | u32 i, j; | ||
2138 | u32 tpc_index, gpc_index, tpc_id; | ||
2139 | u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
2140 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
2141 | |||
2142 | tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); | ||
2143 | if (!tpc_sm_id) | ||
2144 | return -ENOMEM; | ||
2145 | |||
2146 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ | ||
2147 | for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { | ||
2148 | u32 reg = 0; | ||
2149 | u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + | ||
2150 | gr_cwd_gpc_tpc_id_tpc0_s(); | ||
2151 | |||
2152 | for (j = 0; j < 4; j++) { | ||
2153 | u32 sm_id; | ||
2154 | u32 bits; | ||
2155 | |||
2156 | tpc_id = (i << 2) + j; | ||
2157 | sm_id = tpc_id * sm_per_tpc; | ||
2158 | |||
2159 | if (sm_id >= g->gr.no_of_sm) | ||
2160 | break; | ||
2161 | |||
2162 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
2163 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
2164 | |||
2165 | bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | | ||
2166 | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
2167 | reg |= bits << (j * bit_stride); | ||
2168 | |||
2169 | tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4) | ||
2170 | >> 2))] |= tpc_id << tpc_index * bit_stride; | ||
2171 | } | ||
2172 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); | ||
2173 | } | ||
2174 | |||
2175 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | ||
2176 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | ||
2177 | nvgpu_kfree(g, tpc_sm_id); | ||
2178 | |||
2179 | return 0; | ||
2180 | } | ||
2181 | |||
2182 | int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) | ||
2183 | { | ||
2184 | u32 addr_lo; | ||
2185 | u32 addr_hi; | ||
2186 | struct ctx_header_desc *ctx; | ||
2187 | int err; | ||
2188 | |||
2189 | gk20a_dbg_fn(""); | ||
2190 | |||
2191 | err = gv11b_alloc_subctx_header(c); | ||
2192 | if (err) | ||
2193 | return err; | ||
2194 | |||
2195 | err = gv11b_update_subctx_header(c, gpu_va); | ||
2196 | if (err) | ||
2197 | return err; | ||
2198 | |||
2199 | ctx = &c->ch_ctx.ctx_header; | ||
2200 | addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); | ||
2201 | addr_hi = u64_hi32(ctx->mem.gpu_va); | ||
2202 | |||
2203 | /* point this address to engine_wfi_ptr */ | ||
2204 | nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), | ||
2205 | ram_in_engine_cs_wfi_v() | | ||
2206 | ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) | | ||
2207 | ram_in_engine_wfi_ptr_lo_f(addr_lo)); | ||
2208 | |||
2209 | nvgpu_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(), | ||
2210 | ram_in_engine_wfi_ptr_hi_f(addr_hi)); | ||
2211 | |||
2212 | return 0; | ||
2213 | } | ||
2214 | |||
2215 | |||
2216 | |||
2217 | int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | ||
2218 | { | ||
2219 | struct channel_ctx_gk20a *ch_ctx = NULL; | ||
2220 | u32 pd_ab_dist_cfg0; | ||
2221 | u32 ds_debug; | ||
2222 | u32 mpc_vtg_debug; | ||
2223 | u32 pe_vaf; | ||
2224 | u32 pe_vsc_vpc; | ||
2225 | |||
2226 | gk20a_dbg_fn(""); | ||
2227 | |||
2228 | pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r()); | ||
2229 | ds_debug = gk20a_readl(g, gr_ds_debug_r()); | ||
2230 | mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); | ||
2231 | |||
2232 | pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); | ||
2233 | pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); | ||
2234 | |||
2235 | pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf; | ||
2236 | pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | | ||
2237 | pe_vsc_vpc; | ||
2238 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | | ||
2239 | pd_ab_dist_cfg0; | ||
2240 | ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; | ||
2241 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | | ||
2242 | mpc_vtg_debug; | ||
2243 | |||
2244 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, | ||
2245 | false); | ||
2246 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), | ||
2247 | pe_vsc_vpc, false); | ||
2248 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), | ||
2249 | pd_ab_dist_cfg0, false); | ||
2250 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); | ||
2251 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), | ||
2252 | mpc_vtg_debug, false); | ||
2253 | |||
2254 | return 0; | ||
2255 | } | ||
2256 | |||
2257 | void gr_gv11b_write_zcull_ptr(struct gk20a *g, | ||
2258 | struct nvgpu_mem *mem, u64 gpu_va) | ||
2259 | { | ||
2260 | u32 va_lo, va_hi; | ||
2261 | |||
2262 | gpu_va = gpu_va >> 8; | ||
2263 | va_lo = u64_lo32(gpu_va); | ||
2264 | va_hi = u64_hi32(gpu_va); | ||
2265 | nvgpu_mem_wr(g, mem, | ||
2266 | ctxsw_prog_main_image_zcull_ptr_o(), va_lo); | ||
2267 | nvgpu_mem_wr(g, mem, | ||
2268 | ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi); | ||
2269 | } | ||
2270 | |||
2271 | |||
2272 | void gr_gv11b_write_pm_ptr(struct gk20a *g, | ||
2273 | struct nvgpu_mem *mem, u64 gpu_va) | ||
2274 | { | ||
2275 | u32 va_lo, va_hi; | ||
2276 | |||
2277 | gpu_va = gpu_va >> 8; | ||
2278 | va_lo = u64_lo32(gpu_va); | ||
2279 | va_hi = u64_hi32(gpu_va); | ||
2280 | nvgpu_mem_wr(g, mem, | ||
2281 | ctxsw_prog_main_image_pm_ptr_o(), va_lo); | ||
2282 | nvgpu_mem_wr(g, mem, | ||
2283 | ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); | ||
2284 | } | ||
2285 | |||
2286 | void gr_gv11b_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) | ||
2287 | { | ||
2288 | u32 gate_ctrl; | ||
2289 | |||
2290 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_ELCG)) | ||
2291 | return; | ||
2292 | |||
2293 | gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); | ||
2294 | |||
2295 | switch (mode) { | ||
2296 | case ELCG_RUN: | ||
2297 | gate_ctrl = set_field(gate_ctrl, | ||
2298 | therm_gate_ctrl_eng_clk_m(), | ||
2299 | therm_gate_ctrl_eng_clk_run_f()); | ||
2300 | gate_ctrl = set_field(gate_ctrl, | ||
2301 | therm_gate_ctrl_idle_holdoff_m(), | ||
2302 | therm_gate_ctrl_idle_holdoff_on_f()); | ||
2303 | break; | ||
2304 | case ELCG_STOP: | ||
2305 | gate_ctrl = set_field(gate_ctrl, | ||
2306 | therm_gate_ctrl_eng_clk_m(), | ||
2307 | therm_gate_ctrl_eng_clk_stop_f()); | ||
2308 | break; | ||
2309 | case ELCG_AUTO: | ||
2310 | gate_ctrl = set_field(gate_ctrl, | ||
2311 | therm_gate_ctrl_eng_clk_m(), | ||
2312 | therm_gate_ctrl_eng_clk_auto_f()); | ||
2313 | break; | ||
2314 | default: | ||
2315 | nvgpu_err(g, "invalid elcg mode %d", mode); | ||
2316 | } | ||
2317 | |||
2318 | gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); | ||
2319 | } | ||
2320 | |||
2321 | void gr_gv11b_load_tpc_mask(struct gk20a *g) | ||
2322 | { | ||
2323 | u32 pes_tpc_mask = 0, fuse_tpc_mask; | ||
2324 | u32 gpc, pes, val; | ||
2325 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, | ||
2326 | GPU_LIT_NUM_TPC_PER_GPC); | ||
2327 | |||
2328 | /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ | ||
2329 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { | ||
2330 | for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { | ||
2331 | pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << | ||
2332 | num_tpc_per_gpc * gpc; | ||
2333 | } | ||
2334 | } | ||
2335 | |||
2336 | gk20a_dbg_info("pes_tpc_mask %u\n", pes_tpc_mask); | ||
2337 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, gpc); | ||
2338 | if (g->tpc_fs_mask_user && | ||
2339 | g->tpc_fs_mask_user != fuse_tpc_mask && | ||
2340 | fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) { | ||
2341 | val = g->tpc_fs_mask_user; | ||
2342 | val &= (0x1U << g->gr.max_tpc_count) - 1U; | ||
2343 | val = (0x1U << hweight32(val)) - 1U; | ||
2344 | gk20a_writel(g, gr_fe_tpc_fs_r(0), val); | ||
2345 | } else { | ||
2346 | gk20a_writel(g, gr_fe_tpc_fs_r(0), pes_tpc_mask); | ||
2347 | } | ||
2348 | |||
2349 | } | ||
2350 | |||
2351 | void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, | ||
2352 | struct nvgpu_mem *mem, u64 gpu_va) | ||
2353 | { | ||
2354 | u32 addr_lo, addr_hi; | ||
2355 | |||
2356 | addr_lo = u64_lo32(gpu_va); | ||
2357 | addr_hi = u64_hi32(gpu_va); | ||
2358 | |||
2359 | nvgpu_mem_wr(g, mem, | ||
2360 | ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo); | ||
2361 | nvgpu_mem_wr(g, mem, | ||
2362 | ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi); | ||
2363 | |||
2364 | nvgpu_mem_wr(g, mem, | ||
2365 | ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo); | ||
2366 | nvgpu_mem_wr(g, mem, | ||
2367 | ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(), | ||
2368 | addr_hi); | ||
2369 | |||
2370 | } | ||
2371 | |||
2372 | int gr_gv11b_init_fs_state(struct gk20a *g) | ||
2373 | { | ||
2374 | u32 data; | ||
2375 | |||
2376 | gk20a_dbg_fn(""); | ||
2377 | |||
2378 | data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r()); | ||
2379 | data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(), | ||
2380 | gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f()); | ||
2381 | gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data); | ||
2382 | |||
2383 | data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); | ||
2384 | data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(), | ||
2385 | gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f()); | ||
2386 | gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data); | ||
2387 | |||
2388 | if (g->gr.t18x.fecs_feature_override_ecc_val != 0) { | ||
2389 | gk20a_writel(g, | ||
2390 | gr_fecs_feature_override_ecc_r(), | ||
2391 | g->gr.t18x.fecs_feature_override_ecc_val); | ||
2392 | } | ||
2393 | |||
2394 | return gr_gm20b_init_fs_state(g); | ||
2395 | } | ||
2396 | |||
2397 | void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, | ||
2398 | u32 *esr_sm_sel) | ||
2399 | { | ||
2400 | u32 reg_val; | ||
2401 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); | ||
2402 | |||
2403 | reg_val = gk20a_readl(g, gr_gpc0_tpc0_sm_tpc_esr_sm_sel_r() + offset); | ||
2404 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2405 | "sm tpc esr sm sel reg val: 0x%x", reg_val); | ||
2406 | *esr_sm_sel = 0; | ||
2407 | if (gr_gpc0_tpc0_sm_tpc_esr_sm_sel_sm0_error_v(reg_val)) | ||
2408 | *esr_sm_sel = 1; | ||
2409 | if (gr_gpc0_tpc0_sm_tpc_esr_sm_sel_sm1_error_v(reg_val)) | ||
2410 | *esr_sm_sel |= 1 << 1; | ||
2411 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2412 | "esr_sm_sel bitmask: 0x%x", *esr_sm_sel); | ||
2413 | } | ||
2414 | |||
2415 | int gv11b_gr_sm_trigger_suspend(struct gk20a *g) | ||
2416 | { | ||
2417 | u32 dbgr_control0; | ||
2418 | |||
2419 | /* assert stop trigger. uniformity assumption: all SMs will have | ||
2420 | * the same state in dbg_control0. | ||
2421 | */ | ||
2422 | dbgr_control0 = | ||
2423 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); | ||
2424 | dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); | ||
2425 | |||
2426 | /* broadcast write */ | ||
2427 | gk20a_writel(g, | ||
2428 | gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); | ||
2429 | |||
2430 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
2431 | "stop trigger enable: broadcast dbgr_control0: 0x%x ", | ||
2432 | dbgr_control0); | ||
2433 | |||
2434 | return 0; | ||
2435 | } | ||
2436 | |||
2437 | void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) | ||
2438 | { | ||
2439 | /* Check if we have at least one valid warp | ||
2440 | * get paused state on maxwell | ||
2441 | */ | ||
2442 | struct gr_gk20a *gr = &g->gr; | ||
2443 | u32 gpc, tpc, sm, sm_id; | ||
2444 | u32 offset; | ||
2445 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
2446 | |||
2447 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
2448 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
2449 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
2450 | sm = g->gr.sm_to_cluster[sm_id].sm_index; | ||
2451 | |||
2452 | offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2453 | gk20a_gr_tpc_offset(g, tpc) + | ||
2454 | gv11b_gr_sm_offset(g, sm); | ||
2455 | |||
2456 | /* 64 bit read */ | ||
2457 | warps_valid = (u64)gk20a_readl(g, | ||
2458 | gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() + | ||
2459 | offset) << 32; | ||
2460 | warps_valid |= gk20a_readl(g, | ||
2461 | gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() + | ||
2462 | offset); | ||
2463 | |||
2464 | /* 64 bit read */ | ||
2465 | warps_paused = (u64)gk20a_readl(g, | ||
2466 | gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() + | ||
2467 | offset) << 32; | ||
2468 | warps_paused |= gk20a_readl(g, | ||
2469 | gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() + | ||
2470 | offset); | ||
2471 | |||
2472 | /* 64 bit read */ | ||
2473 | warps_trapped = (u64)gk20a_readl(g, | ||
2474 | gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() + | ||
2475 | offset) << 32; | ||
2476 | warps_trapped |= gk20a_readl(g, | ||
2477 | gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() + | ||
2478 | offset); | ||
2479 | |||
2480 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
2481 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
2482 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
2483 | } | ||
2484 | |||
2485 | |||
2486 | /* Only for debug purpose */ | ||
2487 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
2488 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
2489 | sm_id, w_state[sm_id].valid_warps[0]); | ||
2490 | gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n", | ||
2491 | sm_id, w_state[sm_id].valid_warps[1]); | ||
2492 | |||
2493 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
2494 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
2495 | gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n", | ||
2496 | sm_id, w_state[sm_id].trapped_warps[1]); | ||
2497 | |||
2498 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
2499 | sm_id, w_state[sm_id].paused_warps[0]); | ||
2500 | gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n", | ||
2501 | sm_id, w_state[sm_id].paused_warps[1]); | ||
2502 | } | ||
2503 | } | ||
2504 | |||
2505 | int gv11b_gr_update_sm_error_state(struct gk20a *g, | ||
2506 | struct channel_gk20a *ch, u32 sm_id, | ||
2507 | struct nvgpu_gr_sm_error_state *sm_error_state) | ||
2508 | { | ||
2509 | u32 gpc, tpc, sm, offset; | ||
2510 | struct gr_gk20a *gr = &g->gr; | ||
2511 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
2512 | int err = 0; | ||
2513 | |||
2514 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
2515 | |||
2516 | gr->sm_error_states[sm_id].hww_global_esr = | ||
2517 | sm_error_state->hww_global_esr; | ||
2518 | gr->sm_error_states[sm_id].hww_warp_esr = | ||
2519 | sm_error_state->hww_warp_esr; | ||
2520 | gr->sm_error_states[sm_id].hww_warp_esr_pc = | ||
2521 | sm_error_state->hww_warp_esr_pc; | ||
2522 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = | ||
2523 | sm_error_state->hww_global_esr_report_mask; | ||
2524 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = | ||
2525 | sm_error_state->hww_warp_esr_report_mask; | ||
2526 | |||
2527 | err = gr_gk20a_disable_ctxsw(g); | ||
2528 | if (err) { | ||
2529 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
2530 | goto fail; | ||
2531 | } | ||
2532 | |||
2533 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
2534 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
2535 | sm = g->gr.sm_to_cluster[sm_id].sm_index; | ||
2536 | |||
2537 | offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2538 | gk20a_gr_tpc_offset(g, tpc) + | ||
2539 | gv11b_gr_sm_offset(g, sm); | ||
2540 | |||
2541 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
2542 | gk20a_writel(g, | ||
2543 | gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, | ||
2544 | gr->sm_error_states[sm_id].hww_global_esr); | ||
2545 | gk20a_writel(g, | ||
2546 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, | ||
2547 | gr->sm_error_states[sm_id].hww_warp_esr); | ||
2548 | gk20a_writel(g, | ||
2549 | gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset, | ||
2550 | gr->sm_error_states[sm_id].hww_warp_esr_pc); | ||
2551 | gk20a_writel(g, | ||
2552 | gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset, | ||
2553 | gr->sm_error_states[sm_id].hww_global_esr_report_mask); | ||
2554 | gk20a_writel(g, | ||
2555 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset, | ||
2556 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask); | ||
2557 | } else { | ||
2558 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | ||
2559 | if (err) | ||
2560 | goto enable_ctxsw; | ||
2561 | |||
2562 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
2563 | gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r() + | ||
2564 | offset, | ||
2565 | gr->sm_error_states[sm_id].hww_global_esr_report_mask, | ||
2566 | true); | ||
2567 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
2568 | gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r() + | ||
2569 | offset, | ||
2570 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask, | ||
2571 | true); | ||
2572 | |||
2573 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); | ||
2574 | } | ||
2575 | |||
2576 | enable_ctxsw: | ||
2577 | err = gr_gk20a_enable_ctxsw(g); | ||
2578 | |||
2579 | fail: | ||
2580 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
2581 | return err; | ||
2582 | } | ||
2583 | |||
2584 | int gv11b_gr_set_sm_debug_mode(struct gk20a *g, | ||
2585 | struct channel_gk20a *ch, u64 sms, bool enable) | ||
2586 | { | ||
2587 | struct nvgpu_dbg_gpu_reg_op *ops; | ||
2588 | unsigned int i = 0, sm_id; | ||
2589 | int err; | ||
2590 | |||
2591 | ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); | ||
2592 | if (!ops) | ||
2593 | return -ENOMEM; | ||
2594 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | ||
2595 | u32 gpc, tpc, sm; | ||
2596 | u32 reg_offset, reg_mask, reg_val; | ||
2597 | |||
2598 | if (!(sms & (1 << sm_id))) | ||
2599 | continue; | ||
2600 | |||
2601 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
2602 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
2603 | sm = g->gr.sm_to_cluster[sm_id].sm_index; | ||
2604 | |||
2605 | reg_offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2606 | gk20a_gr_tpc_offset(g, tpc) + | ||
2607 | gv11b_gr_sm_offset(g, sm); | ||
2608 | |||
2609 | ops[i].op = REGOP(WRITE_32); | ||
2610 | ops[i].type = REGOP(TYPE_GR_CTX); | ||
2611 | ops[i].offset = gr_gpc0_tpc0_sm0_dbgr_control0_r() + reg_offset; | ||
2612 | |||
2613 | reg_mask = 0; | ||
2614 | reg_val = 0; | ||
2615 | if (enable) { | ||
2616 | nvgpu_log(g, gpu_dbg_gpu_dbg, | ||
2617 | "SM:%d debuggger mode ON", sm); | ||
2618 | reg_mask |= | ||
2619 | gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_m(); | ||
2620 | reg_val |= | ||
2621 | gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_on_f(); | ||
2622 | } else { | ||
2623 | nvgpu_log(g, gpu_dbg_gpu_dbg, | ||
2624 | "SM:%d debuggger mode Off", sm); | ||
2625 | reg_mask |= | ||
2626 | gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_m(); | ||
2627 | reg_val |= | ||
2628 | gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_off_f(); | ||
2629 | } | ||
2630 | |||
2631 | ops[i].and_n_mask_lo = reg_mask; | ||
2632 | ops[i].value_lo = reg_val; | ||
2633 | i++; | ||
2634 | } | ||
2635 | |||
2636 | err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0); | ||
2637 | if (err) | ||
2638 | nvgpu_err(g, "Failed to access register\n"); | ||
2639 | nvgpu_kfree(g, ops); | ||
2640 | return err; | ||
2641 | } | ||
2642 | |||
2643 | int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
2644 | { | ||
2645 | int sm_id; | ||
2646 | struct gr_gk20a *gr = &g->gr; | ||
2647 | u32 offset, sm, sm_per_tpc; | ||
2648 | u32 gpc_tpc_offset; | ||
2649 | |||
2650 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
2651 | |||
2652 | sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
2653 | gpc_tpc_offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2654 | gk20a_gr_tpc_offset(g, tpc); | ||
2655 | |||
2656 | sm_id = gr_gpc0_tpc0_sm_cfg_tpc_id_v(gk20a_readl(g, | ||
2657 | gr_gpc0_tpc0_sm_cfg_r() + gpc_tpc_offset)); | ||
2658 | |||
2659 | sm = sm_id % sm_per_tpc; | ||
2660 | |||
2661 | offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm); | ||
2662 | |||
2663 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
2664 | gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); | ||
2665 | |||
2666 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
2667 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset); | ||
2668 | |||
2669 | gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g, | ||
2670 | gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset); | ||
2671 | |||
2672 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
2673 | gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset); | ||
2674 | |||
2675 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
2676 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset); | ||
2677 | |||
2678 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
2679 | |||
2680 | return 0; | ||
2681 | } | ||
2682 | |||
2683 | void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) | ||
2684 | { | ||
2685 | |||
2686 | /* clear hww */ | ||
2687 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_r(), 0xffffffff); | ||
2688 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_r(), 0xffffffff); | ||
2689 | |||
2690 | /* setup sm warp esr report masks */ | ||
2691 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(), | ||
2692 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_error_report_f() | | ||
2693 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
2694 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
2695 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
2696 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
2697 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
2698 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
2699 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
2700 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
2701 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
2702 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
2703 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
2704 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
2705 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | | ||
2706 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()); | ||
2707 | |||
2708 | /* setup sm global esr report mask. vat_alarm_report is not enabled */ | ||
2709 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), | ||
2710 | gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_multiple_warp_errors_report_f()); | ||
2711 | } | ||
2712 | |||
2713 | bool gv11b_gr_sm_debugger_attached(struct gk20a *g) | ||
2714 | { | ||
2715 | u32 debugger_mode; | ||
2716 | u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); | ||
2717 | |||
2718 | /* check if sm debugger is attached. | ||
2719 | * assumption: all SMs will have debug mode enabled/disabled | ||
2720 | * uniformly. | ||
2721 | */ | ||
2722 | debugger_mode = | ||
2723 | gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_v(dbgr_control0); | ||
2724 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
2725 | "SM Debugger Mode: %d", debugger_mode); | ||
2726 | if (debugger_mode == | ||
2727 | gr_gpc0_tpc0_sm0_dbgr_control0_debugger_mode_on_v()) | ||
2728 | return true; | ||
2729 | |||
2730 | return false; | ||
2731 | } | ||
2732 | |||
2733 | void gv11b_gr_suspend_single_sm(struct gk20a *g, | ||
2734 | u32 gpc, u32 tpc, u32 sm, | ||
2735 | u32 global_esr_mask, bool check_errors) | ||
2736 | { | ||
2737 | int err; | ||
2738 | u32 dbgr_control0; | ||
2739 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2740 | gk20a_gr_tpc_offset(g, tpc) + | ||
2741 | gv11b_gr_sm_offset(g, sm); | ||
2742 | |||
2743 | /* if an SM debugger isn't attached, skip suspend */ | ||
2744 | if (!g->ops.gr.sm_debugger_attached(g)) { | ||
2745 | nvgpu_err(g, | ||
2746 | "SM debugger not attached, skipping suspend!"); | ||
2747 | return; | ||
2748 | } | ||
2749 | |||
2750 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2751 | "suspending gpc:%d, tpc:%d, sm%d", gpc, tpc, sm); | ||
2752 | |||
2753 | /* assert stop trigger. */ | ||
2754 | dbgr_control0 = gk20a_readl(g, | ||
2755 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
2756 | dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); | ||
2757 | gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, | ||
2758 | dbgr_control0); | ||
2759 | |||
2760 | err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, | ||
2761 | global_esr_mask, check_errors); | ||
2762 | if (err) { | ||
2763 | nvgpu_err(g, | ||
2764 | "SuspendSm failed"); | ||
2765 | return; | ||
2766 | } | ||
2767 | } | ||
2768 | |||
2769 | void gv11b_gr_suspend_all_sms(struct gk20a *g, | ||
2770 | u32 global_esr_mask, bool check_errors) | ||
2771 | { | ||
2772 | struct gr_gk20a *gr = &g->gr; | ||
2773 | u32 gpc, tpc, sm; | ||
2774 | int err; | ||
2775 | u32 dbgr_control0; | ||
2776 | u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
2777 | |||
2778 | /* if an SM debugger isn't attached, skip suspend */ | ||
2779 | if (!g->ops.gr.sm_debugger_attached(g)) { | ||
2780 | nvgpu_err(g, | ||
2781 | "SM debugger not attached, skipping suspend!"); | ||
2782 | return; | ||
2783 | } | ||
2784 | |||
2785 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "suspending all sms"); | ||
2786 | |||
2787 | /* assert stop trigger. uniformity assumption: all SMs will have | ||
2788 | * the same state in dbg_control0. | ||
2789 | */ | ||
2790 | dbgr_control0 = | ||
2791 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); | ||
2792 | dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); | ||
2793 | |||
2794 | /* broadcast write */ | ||
2795 | gk20a_writel(g, | ||
2796 | gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); | ||
2797 | |||
2798 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | ||
2799 | for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { | ||
2800 | for (sm = 0; sm < sm_per_tpc; sm++) { | ||
2801 | err = g->ops.gr.wait_for_sm_lock_down(g, | ||
2802 | gpc, tpc, sm, | ||
2803 | global_esr_mask, check_errors); | ||
2804 | if (err) { | ||
2805 | nvgpu_err(g, | ||
2806 | "SuspendAllSms failed"); | ||
2807 | return; | ||
2808 | } | ||
2809 | } | ||
2810 | } | ||
2811 | } | ||
2812 | } | ||
2813 | |||
2814 | void gv11b_gr_resume_single_sm(struct gk20a *g, | ||
2815 | u32 gpc, u32 tpc, u32 sm) | ||
2816 | { | ||
2817 | u32 dbgr_control0, dbgr_status0; | ||
2818 | u32 offset; | ||
2819 | /* | ||
2820 | * The following requires some clarification. Despite the fact that both | ||
2821 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their | ||
2822 | * names, only one is actually a trigger, and that is the STOP_TRIGGER. | ||
2823 | * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to | ||
2824 | * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0 | ||
2825 | * (_DISABLE) as well. | ||
2826 | |||
2827 | * Advice from the arch group: Disable the stop trigger first, as a | ||
2828 | * separate operation, in order to ensure that the trigger has taken | ||
2829 | * effect, before enabling the run trigger. | ||
2830 | */ | ||
2831 | |||
2832 | offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + | ||
2833 | gv11b_gr_sm_offset(g, sm); | ||
2834 | |||
2835 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2836 | "resuming gpc:%d, tpc:%d, sm%d", gpc, tpc, sm); | ||
2837 | dbgr_control0 = gk20a_readl(g, | ||
2838 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
2839 | dbgr_status0 = gk20a_readl(g, | ||
2840 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
2841 | |||
2842 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2843 | "before stop trigger disable: " | ||
2844 | "dbgr_control0 = 0x%x dbgr_status0: 0x%x", | ||
2845 | dbgr_control0, dbgr_status0); | ||
2846 | |||
2847 | /*De-assert stop trigger */ | ||
2848 | dbgr_control0 = set_field(dbgr_control0, | ||
2849 | gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_m(), | ||
2850 | gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_disable_f()); | ||
2851 | gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + | ||
2852 | offset, dbgr_control0); | ||
2853 | |||
2854 | dbgr_control0 = gk20a_readl(g, | ||
2855 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
2856 | dbgr_status0 = gk20a_readl(g, | ||
2857 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
2858 | |||
2859 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2860 | "before run trigger: " | ||
2861 | "dbgr_control0 = 0x%x dbgr_status0: 0x%x", | ||
2862 | dbgr_control0, dbgr_status0); | ||
2863 | /* Run trigger */ | ||
2864 | dbgr_control0 |= | ||
2865 | gr_gpc0_tpc0_sm0_dbgr_control0_run_trigger_task_f(); | ||
2866 | gk20a_writel(g, | ||
2867 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + | ||
2868 | offset, dbgr_control0); | ||
2869 | |||
2870 | dbgr_control0 = gk20a_readl(g, | ||
2871 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
2872 | dbgr_status0 = gk20a_readl(g, | ||
2873 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
2874 | /* run trigger is not sticky bit. SM clears it immediately */ | ||
2875 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2876 | "after run trigger: " | ||
2877 | "dbgr_control0 = 0x%x dbgr_status0: 0x%x", | ||
2878 | dbgr_control0, dbgr_status0); | ||
2879 | |||
2880 | } | ||
2881 | |||
2882 | void gv11b_gr_resume_all_sms(struct gk20a *g) | ||
2883 | { | ||
2884 | u32 dbgr_control0, dbgr_status0; | ||
2885 | /* | ||
2886 | * The following requires some clarification. Despite the fact that both | ||
2887 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their | ||
2888 | * names, only one is actually a trigger, and that is the STOP_TRIGGER. | ||
2889 | * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to | ||
2890 | * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0 | ||
2891 | * (_DISABLE) as well. | ||
2892 | |||
2893 | * Advice from the arch group: Disable the stop trigger first, as a | ||
2894 | * separate operation, in order to ensure that the trigger has taken | ||
2895 | * effect, before enabling the run trigger. | ||
2896 | */ | ||
2897 | |||
2898 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "resuming all sms"); | ||
2899 | |||
2900 | /* Read from unicast registers */ | ||
2901 | dbgr_control0 = | ||
2902 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); | ||
2903 | dbgr_status0 = | ||
2904 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r()); | ||
2905 | |||
2906 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2907 | "before stop trigger disable: " | ||
2908 | "dbgr_control0 = 0x%x dbgr_status0: 0x%x", | ||
2909 | dbgr_control0, dbgr_status0); | ||
2910 | |||
2911 | dbgr_control0 = set_field(dbgr_control0, | ||
2912 | gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_m(), | ||
2913 | gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_disable_f()); | ||
2914 | /* Write to broadcast registers */ | ||
2915 | gk20a_writel(g, | ||
2916 | gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); | ||
2917 | |||
2918 | /* Read from unicast registers */ | ||
2919 | dbgr_control0 = | ||
2920 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); | ||
2921 | dbgr_status0 = | ||
2922 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r()); | ||
2923 | |||
2924 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2925 | "before run trigger: " | ||
2926 | "dbgr_control0 = 0x%x dbgr_status0: 0x%x", | ||
2927 | dbgr_control0, dbgr_status0); | ||
2928 | /* Run trigger */ | ||
2929 | dbgr_control0 |= | ||
2930 | gr_gpc0_tpc0_sm0_dbgr_control0_run_trigger_task_f(); | ||
2931 | /* Write to broadcast registers */ | ||
2932 | gk20a_writel(g, | ||
2933 | gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); | ||
2934 | |||
2935 | /* Read from unicast registers */ | ||
2936 | dbgr_control0 = | ||
2937 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r()); | ||
2938 | dbgr_status0 = | ||
2939 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_status0_r()); | ||
2940 | /* run trigger is not sticky bit. SM clears it immediately */ | ||
2941 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2942 | "after run trigger: " | ||
2943 | "dbgr_control0 = 0x%x dbgr_status0: 0x%x", | ||
2944 | dbgr_control0, dbgr_status0); | ||
2945 | } | ||
2946 | |||
2947 | int gv11b_gr_resume_from_pause(struct gk20a *g) | ||
2948 | { | ||
2949 | int err = 0; | ||
2950 | u32 reg_val; | ||
2951 | |||
2952 | /* Clear the pause mask to tell the GPU we want to resume everyone */ | ||
2953 | gk20a_writel(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r(), 0); | ||
2954 | |||
2955 | /* explicitly re-enable forwarding of SM interrupts upon any resume */ | ||
2956 | reg_val = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r()); | ||
2957 | reg_val |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(); | ||
2958 | |||
2959 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), reg_val); | ||
2960 | |||
2961 | g->ops.gr.resume_all_sms(g); | ||
2962 | |||
2963 | return err; | ||
2964 | } | ||
2965 | |||
2966 | u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g, | ||
2967 | u32 gpc, u32 tpc, u32 sm) | ||
2968 | { | ||
2969 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2970 | gk20a_gr_tpc_offset(g, tpc) + | ||
2971 | gv11b_gr_sm_offset(g, sm); | ||
2972 | |||
2973 | u32 hww_warp_esr = gk20a_readl(g, | ||
2974 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset); | ||
2975 | return hww_warp_esr; | ||
2976 | } | ||
2977 | |||
2978 | u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g, | ||
2979 | u32 gpc, u32 tpc, u32 sm) | ||
2980 | { | ||
2981 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2982 | gk20a_gr_tpc_offset(g, tpc) + | ||
2983 | gv11b_gr_sm_offset(g, sm); | ||
2984 | |||
2985 | u32 hww_global_esr = gk20a_readl(g, | ||
2986 | gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset); | ||
2987 | |||
2988 | return hww_global_esr; | ||
2989 | } | ||
2990 | |||
2991 | u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g) | ||
2992 | { | ||
2993 | /* | ||
2994 | * These three interrupts don't require locking down the SM. They can | ||
2995 | * be handled by usermode clients as they aren't fatal. Additionally, | ||
2996 | * usermode clients may wish to allow some warps to execute while others | ||
2997 | * are at breakpoints, as opposed to fatal errors where all warps should | ||
2998 | * halt. | ||
2999 | */ | ||
3000 | u32 global_esr_mask = | ||
3001 | gr_gpc0_tpc0_sm0_hww_global_esr_bpt_int_pending_f() | | ||
3002 | gr_gpc0_tpc0_sm0_hww_global_esr_bpt_pause_pending_f() | | ||
3003 | gr_gpc0_tpc0_sm0_hww_global_esr_single_step_complete_pending_f(); | ||
3004 | |||
3005 | return global_esr_mask; | ||
3006 | } | ||
3007 | |||
3008 | static void gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(struct gk20a *g, | ||
3009 | u32 offset, bool timeout) | ||
3010 | { | ||
3011 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
3012 | u32 dbgr_control0 = gk20a_readl(g, | ||
3013 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
3014 | u32 dbgr_status0 = gk20a_readl(g, | ||
3015 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
3016 | /* 64 bit read */ | ||
3017 | warps_valid = | ||
3018 | (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() + | ||
3019 | offset) << 32; | ||
3020 | warps_valid |= gk20a_readl(g, | ||
3021 | gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() + offset); | ||
3022 | |||
3023 | /* 64 bit read */ | ||
3024 | warps_paused = | ||
3025 | (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() + | ||
3026 | offset) << 32; | ||
3027 | warps_paused |= gk20a_readl(g, | ||
3028 | gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() + offset); | ||
3029 | |||
3030 | /* 64 bit read */ | ||
3031 | warps_trapped = | ||
3032 | (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() + | ||
3033 | offset) << 32; | ||
3034 | warps_trapped |= gk20a_readl(g, | ||
3035 | gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() + offset); | ||
3036 | if (timeout) | ||
3037 | nvgpu_err(g, | ||
3038 | "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx " | ||
3039 | "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", | ||
3040 | dbgr_status0, dbgr_control0, warps_valid, | ||
3041 | warps_paused, warps_trapped); | ||
3042 | else | ||
3043 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3044 | "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx " | ||
3045 | "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", | ||
3046 | dbgr_status0, dbgr_control0, warps_valid, | ||
3047 | warps_paused, warps_trapped); | ||
3048 | } | ||
3049 | |||
3050 | int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, | ||
3051 | u32 gpc, u32 tpc, u32 sm, | ||
3052 | u32 global_esr_mask, bool check_errors) | ||
3053 | { | ||
3054 | bool locked_down; | ||
3055 | bool no_error_pending; | ||
3056 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
3057 | bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g); | ||
3058 | u32 dbgr_status0 = 0; | ||
3059 | u32 warp_esr, global_esr; | ||
3060 | struct nvgpu_timeout timeout; | ||
3061 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | ||
3062 | gk20a_gr_tpc_offset(g, tpc) + | ||
3063 | gv11b_gr_sm_offset(g, sm); | ||
3064 | |||
3065 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3066 | "GPC%d TPC%d: locking down SM%d", gpc, tpc, sm); | ||
3067 | |||
3068 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | ||
3069 | NVGPU_TIMER_CPU_TIMER); | ||
3070 | |||
3071 | /* wait for the sm to lock down */ | ||
3072 | do { | ||
3073 | global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm); | ||
3074 | dbgr_status0 = gk20a_readl(g, | ||
3075 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
3076 | |||
3077 | warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm); | ||
3078 | |||
3079 | locked_down = | ||
3080 | (gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_v(dbgr_status0) == | ||
3081 | gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_true_v()); | ||
3082 | no_error_pending = | ||
3083 | check_errors && | ||
3084 | (gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr) == | ||
3085 | gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v()) && | ||
3086 | ((global_esr & global_esr_mask) == 0); | ||
3087 | |||
3088 | if (locked_down) { | ||
3089 | /* | ||
3090 | * if SM reports locked down, it means that SM is idle and | ||
3091 | * trapped and also that one of the these conditions are true | ||
3092 | * 1) sm is nonempty and all valid warps are paused | ||
3093 | * 2) sm is empty and held in trapped state due to stop trigger | ||
3094 | * 3) sm is nonempty and some warps are not paused, but are | ||
3095 | * instead held at RTT due to an "active" stop trigger | ||
3096 | * Check for Paused warp mask != Valid | ||
3097 | * warp mask after SM reports it is locked down in order to | ||
3098 | * distinguish case 1 from case 3. When case 3 is detected, | ||
3099 | * it implies a misprogrammed trap handler code, as all warps | ||
3100 | * in the handler must promise to BPT.PAUSE instead of RTT | ||
3101 | * whenever SR64 read in trap mode indicates stop trigger | ||
3102 | * is asserted. | ||
3103 | */ | ||
3104 | gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, | ||
3105 | offset, false); | ||
3106 | } | ||
3107 | |||
3108 | if (locked_down || no_error_pending) { | ||
3109 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3110 | "GPC%d TPC%d: locked down SM%d", gpc, tpc, sm); | ||
3111 | return 0; | ||
3112 | } | ||
3113 | |||
3114 | /* if an mmu fault is pending and mmu debug mode is not | ||
3115 | * enabled, the sm will never lock down. | ||
3116 | */ | ||
3117 | if (!mmu_debug_mode_enabled && | ||
3118 | (g->ops.mm.mmu_fault_pending(g))) { | ||
3119 | nvgpu_err(g, | ||
3120 | "GPC%d TPC%d: mmu fault pending," | ||
3121 | " SM%d will never lock down!", gpc, tpc, sm); | ||
3122 | return -EFAULT; | ||
3123 | } | ||
3124 | |||
3125 | nvgpu_usleep_range(delay, delay * 2); | ||
3126 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
3127 | } while (!nvgpu_timeout_expired(&timeout)); | ||
3128 | |||
3129 | nvgpu_err(g, "GPC%d TPC%d: timed out while trying to " | ||
3130 | "lock down SM%d", gpc, tpc, sm); | ||
3131 | gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, offset, true); | ||
3132 | |||
3133 | return -ETIMEDOUT; | ||
3134 | } | ||
3135 | |||
3136 | int gv11b_gr_lock_down_sm(struct gk20a *g, | ||
3137 | u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, | ||
3138 | bool check_errors) | ||
3139 | { | ||
3140 | u32 dbgr_control0; | ||
3141 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + | ||
3142 | gv11b_gr_sm_offset(g, sm); | ||
3143 | |||
3144 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3145 | "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm); | ||
3146 | |||
3147 | /* assert stop trigger */ | ||
3148 | dbgr_control0 = | ||
3149 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
3150 | dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); | ||
3151 | gk20a_writel(g, | ||
3152 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); | ||
3153 | |||
3154 | return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask, | ||
3155 | check_errors); | ||
3156 | } | ||
3157 | |||
3158 | void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
3159 | u32 global_esr) | ||
3160 | { | ||
3161 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + | ||
3162 | gv11b_gr_sm_offset(g, sm); | ||
3163 | |||
3164 | gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset, | ||
3165 | global_esr); | ||
3166 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
3167 | "Cleared HWW global esr, current reg val: 0x%x", | ||
3168 | gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + | ||
3169 | offset)); | ||
3170 | |||
3171 | gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); | ||
3172 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
3173 | "Cleared HWW warp esr, current reg val: 0x%x", | ||
3174 | gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_warp_esr_r() + | ||
3175 | offset)); | ||
3176 | } | ||
3177 | |||
3178 | int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g, | ||
3179 | u32 gpc, u32 tpc, bool *post_event) | ||
3180 | { | ||
3181 | u32 esr; | ||
3182 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); | ||
3183 | u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() | ||
3184 | + offset); | ||
3185 | |||
3186 | if (!(tpc_exception & gr_gpc0_tpc0_tpccs_tpc_exception_mpc_m())) | ||
3187 | return 0; | ||
3188 | |||
3189 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3190 | "GPC%d TPC%d MPC exception", gpc, tpc); | ||
3191 | |||
3192 | esr = gk20a_readl(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset); | ||
3193 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr 0x%08x", esr); | ||
3194 | |||
3195 | esr = gk20a_readl(g, gr_gpc0_tpc0_mpc_hww_esr_info_r() + offset); | ||
3196 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3197 | "mpc hww esr info: veid 0x%08x", | ||
3198 | gr_gpc0_tpc0_mpc_hww_esr_info_veid_v(esr)); | ||
3199 | |||
3200 | gk20a_writel(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset, | ||
3201 | gr_gpc0_tpc0_mpc_hww_esr_reset_trigger_f()); | ||
3202 | |||
3203 | return 0; | ||
3204 | } | ||
3205 | |||
3206 | static const u32 _num_ovr_perf_regs = 20; | ||
3207 | static u32 _ovr_perf_regs[20] = { 0, }; | ||
3208 | |||
3209 | void gv11b_gr_init_ovr_sm_dsm_perf(void) | ||
3210 | { | ||
3211 | if (_ovr_perf_regs[0] != 0) | ||
3212 | return; | ||
3213 | |||
3214 | _ovr_perf_regs[0] = gr_egpc0_etpc0_sm_dsm_perf_counter_control_sel0_r(); | ||
3215 | _ovr_perf_regs[1] = gr_egpc0_etpc0_sm_dsm_perf_counter_control_sel1_r(); | ||
3216 | _ovr_perf_regs[2] = gr_egpc0_etpc0_sm_dsm_perf_counter_control0_r(); | ||
3217 | _ovr_perf_regs[3] = gr_egpc0_etpc0_sm_dsm_perf_counter_control1_r(); | ||
3218 | _ovr_perf_regs[4] = gr_egpc0_etpc0_sm_dsm_perf_counter_control2_r(); | ||
3219 | _ovr_perf_regs[5] = gr_egpc0_etpc0_sm_dsm_perf_counter_control3_r(); | ||
3220 | _ovr_perf_regs[6] = gr_egpc0_etpc0_sm_dsm_perf_counter_control4_r(); | ||
3221 | _ovr_perf_regs[7] = gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r(); | ||
3222 | _ovr_perf_regs[8] = gr_egpc0_etpc0_sm_dsm_perf_counter0_control_r(); | ||
3223 | _ovr_perf_regs[9] = gr_egpc0_etpc0_sm_dsm_perf_counter1_control_r(); | ||
3224 | _ovr_perf_regs[10] = gr_egpc0_etpc0_sm_dsm_perf_counter2_control_r(); | ||
3225 | _ovr_perf_regs[11] = gr_egpc0_etpc0_sm_dsm_perf_counter3_control_r(); | ||
3226 | _ovr_perf_regs[12] = gr_egpc0_etpc0_sm_dsm_perf_counter4_control_r(); | ||
3227 | _ovr_perf_regs[13] = gr_egpc0_etpc0_sm_dsm_perf_counter5_control_r(); | ||
3228 | _ovr_perf_regs[14] = gr_egpc0_etpc0_sm_dsm_perf_counter6_control_r(); | ||
3229 | _ovr_perf_regs[15] = gr_egpc0_etpc0_sm_dsm_perf_counter7_control_r(); | ||
3230 | |||
3231 | _ovr_perf_regs[16] = gr_egpc0_etpc0_sm0_dsm_perf_counter4_r(); | ||
3232 | _ovr_perf_regs[17] = gr_egpc0_etpc0_sm0_dsm_perf_counter5_r(); | ||
3233 | _ovr_perf_regs[18] = gr_egpc0_etpc0_sm0_dsm_perf_counter6_r(); | ||
3234 | _ovr_perf_regs[19] = gr_egpc0_etpc0_sm0_dsm_perf_counter7_r(); | ||
3235 | } | ||
3236 | |||
3237 | /* Following are the blocks of registers that the ucode | ||
3238 | * stores in the extended region. | ||
3239 | */ | ||
3240 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | ||
3241 | static const u32 _num_sm_dsm_perf_regs; | ||
3242 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
3243 | static const u32 _num_sm_dsm_perf_ctrl_regs = 2; | ||
3244 | static u32 *_sm_dsm_perf_regs; | ||
3245 | static u32 _sm_dsm_perf_ctrl_regs[2]; | ||
3246 | |||
3247 | void gv11b_gr_init_sm_dsm_reg_info(void) | ||
3248 | { | ||
3249 | if (_sm_dsm_perf_ctrl_regs[0] != 0) | ||
3250 | return; | ||
3251 | |||
3252 | _sm_dsm_perf_ctrl_regs[0] = | ||
3253 | gr_egpc0_etpc0_sm_dsm_perf_counter_control0_r(); | ||
3254 | _sm_dsm_perf_ctrl_regs[1] = | ||
3255 | gr_egpc0_etpc0_sm_dsm_perf_counter_control5_r(); | ||
3256 | } | ||
3257 | |||
3258 | void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g, | ||
3259 | u32 *num_sm_dsm_perf_regs, | ||
3260 | u32 **sm_dsm_perf_regs, | ||
3261 | u32 *perf_register_stride) | ||
3262 | { | ||
3263 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; | ||
3264 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; | ||
3265 | *perf_register_stride = | ||
3266 | ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | ||
3267 | } | ||
3268 | |||
3269 | void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
3270 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
3271 | u32 **sm_dsm_perf_ctrl_regs, | ||
3272 | u32 *ctrl_register_stride) | ||
3273 | { | ||
3274 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
3275 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
3276 | *ctrl_register_stride = | ||
3277 | ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
3278 | } | ||
3279 | |||
3280 | void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, | ||
3281 | u32 **ovr_perf_regs) | ||
3282 | { | ||
3283 | *num_ovr_perf_regs = _num_ovr_perf_regs; | ||
3284 | *ovr_perf_regs = _ovr_perf_regs; | ||
3285 | } | ||
3286 | |||
3287 | void gv11b_gr_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | ||
3288 | { | ||
3289 | u32 reg_val; | ||
3290 | u32 quad_ctrl; | ||
3291 | u32 half_ctrl; | ||
3292 | u32 tpc, gpc; | ||
3293 | u32 gpc_tpc_addr; | ||
3294 | u32 gpc_tpc_stride; | ||
3295 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3296 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
3297 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
3298 | |||
3299 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); | ||
3300 | |||
3301 | gpc = pri_get_gpc_num(g, offset); | ||
3302 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); | ||
3303 | tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr); | ||
3304 | |||
3305 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ | ||
3306 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ | ||
3307 | |||
3308 | gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride; | ||
3309 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; | ||
3310 | |||
3311 | /* read from unicast reg */ | ||
3312 | reg_val = gk20a_readl(g, gpc_tpc_addr); | ||
3313 | reg_val = set_field(reg_val, | ||
3314 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(), | ||
3315 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl)); | ||
3316 | |||
3317 | /* write to broadcast reg */ | ||
3318 | gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), reg_val); | ||
3319 | |||
3320 | gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride; | ||
3321 | reg_val = gk20a_readl(g, gpc_tpc_addr); | ||
3322 | reg_val = set_field(reg_val, | ||
3323 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(), | ||
3324 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl)); | ||
3325 | |||
3326 | /* write to broadcast reg */ | ||
3327 | gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), reg_val); | ||
3328 | } | ||
3329 | |||
3330 | static bool pri_is_egpc_addr_shared(struct gk20a *g, u32 addr) | ||
3331 | { | ||
3332 | u32 egpc_shared_base = EGPC_PRI_SHARED_BASE; | ||
3333 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3334 | |||
3335 | return (addr >= egpc_shared_base) && | ||
3336 | (addr < egpc_shared_base + gpc_stride); | ||
3337 | } | ||
3338 | |||
3339 | bool gv11b_gr_pri_is_egpc_addr(struct gk20a *g, u32 addr) | ||
3340 | { | ||
3341 | u32 egpc_base = g->ops.gr.get_egpc_base(g); | ||
3342 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3343 | u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
3344 | |||
3345 | return ((addr >= egpc_base) && | ||
3346 | (addr < egpc_base + num_gpcs * gpc_stride)) || | ||
3347 | pri_is_egpc_addr_shared(g, addr); | ||
3348 | } | ||
3349 | |||
3350 | static inline u32 pri_smpc_in_etpc_addr_mask(struct gk20a *g, u32 addr) | ||
3351 | { | ||
3352 | u32 smpc_stride = nvgpu_get_litter_value(g, | ||
3353 | GPU_LIT_SMPC_PRI_STRIDE); | ||
3354 | |||
3355 | return (addr & (smpc_stride - 1)); | ||
3356 | } | ||
3357 | |||
3358 | static u32 pri_smpc_ext_addr(struct gk20a *g, u32 sm_offset, u32 gpc_num, | ||
3359 | u32 tpc_num, u32 sm_num) | ||
3360 | { | ||
3361 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3362 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, | ||
3363 | GPU_LIT_TPC_IN_GPC_BASE); | ||
3364 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
3365 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
3366 | u32 egpc_base = g->ops.gr.get_egpc_base(g); | ||
3367 | u32 smpc_unique_base = nvgpu_get_litter_value(g, | ||
3368 | GPU_LIT_SMPC_PRI_UNIQUE_BASE); | ||
3369 | u32 smpc_stride = nvgpu_get_litter_value(g, | ||
3370 | GPU_LIT_SMPC_PRI_STRIDE); | ||
3371 | |||
3372 | return (egpc_base + (gpc_num * gpc_stride) + tpc_in_gpc_base + | ||
3373 | (tpc_num * tpc_in_gpc_stride) + | ||
3374 | (sm_num * smpc_stride) + | ||
3375 | (smpc_unique_base + sm_offset)); | ||
3376 | } | ||
3377 | |||
3378 | static bool pri_is_smpc_addr_in_etpc_shared(struct gk20a *g, u32 addr) | ||
3379 | { | ||
3380 | u32 smpc_shared_base = nvgpu_get_litter_value(g, | ||
3381 | GPU_LIT_SMPC_PRI_SHARED_BASE); | ||
3382 | u32 smpc_stride = nvgpu_get_litter_value(g, | ||
3383 | GPU_LIT_SMPC_PRI_STRIDE); | ||
3384 | |||
3385 | return (addr >= smpc_shared_base) && | ||
3386 | (addr < smpc_shared_base + smpc_stride); | ||
3387 | } | ||
3388 | |||
3389 | bool gv11b_gr_pri_is_etpc_addr(struct gk20a *g, u32 addr) | ||
3390 | { | ||
3391 | u32 egpc_addr = 0; | ||
3392 | |||
3393 | if (g->ops.gr.is_egpc_addr(g, addr)) { | ||
3394 | egpc_addr = pri_gpccs_addr_mask(addr); | ||
3395 | if (g->ops.gr.is_tpc_addr(g, egpc_addr)) | ||
3396 | return true; | ||
3397 | } | ||
3398 | |||
3399 | return false; | ||
3400 | } | ||
3401 | |||
3402 | static u32 pri_get_egpc_num(struct gk20a *g, u32 addr) | ||
3403 | { | ||
3404 | u32 i, start; | ||
3405 | u32 egpc_base = g->ops.gr.get_egpc_base(g); | ||
3406 | u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
3407 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3408 | |||
3409 | for (i = 0; i < num_gpcs; i++) { | ||
3410 | start = egpc_base + (i * gpc_stride); | ||
3411 | if ((addr >= start) && (addr < (start + gpc_stride))) | ||
3412 | return i; | ||
3413 | } | ||
3414 | return 0; | ||
3415 | } | ||
3416 | |||
3417 | static u32 pri_egpc_addr(struct gk20a *g, u32 addr, u32 gpc) | ||
3418 | { | ||
3419 | u32 egpc_base = g->ops.gr.get_egpc_base(g); | ||
3420 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3421 | |||
3422 | return egpc_base + (gpc * gpc_stride) + addr; | ||
3423 | } | ||
3424 | |||
3425 | static u32 pri_etpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc) | ||
3426 | { | ||
3427 | u32 egpc_base = g->ops.gr.get_egpc_base(g); | ||
3428 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3429 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, | ||
3430 | GPU_LIT_TPC_IN_GPC_BASE); | ||
3431 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
3432 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
3433 | |||
3434 | return egpc_base + (gpc * gpc_stride) + | ||
3435 | tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) + | ||
3436 | addr; | ||
3437 | } | ||
3438 | |||
3439 | void gv11b_gr_get_egpc_etpc_num(struct gk20a *g, u32 addr, | ||
3440 | u32 *egpc_num, u32 *etpc_num) | ||
3441 | { | ||
3442 | u32 egpc_addr = 0; | ||
3443 | |||
3444 | *egpc_num = pri_get_egpc_num(g, addr); | ||
3445 | egpc_addr = pri_gpccs_addr_mask(addr); | ||
3446 | *etpc_num = g->ops.gr.get_tpc_num(g, egpc_addr); | ||
3447 | |||
3448 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
3449 | "egpc_num = %d etpc_num = %d", *egpc_num, *etpc_num); | ||
3450 | } | ||
3451 | |||
3452 | int gv11b_gr_decode_egpc_addr(struct gk20a *g, u32 addr, int *addr_type, | ||
3453 | u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags) | ||
3454 | { | ||
3455 | u32 gpc_addr; | ||
3456 | u32 tpc_addr; | ||
3457 | |||
3458 | if (g->ops.gr.is_egpc_addr(g, addr)) { | ||
3459 | nvgpu_log_info(g, "addr=0x%x is egpc", addr); | ||
3460 | |||
3461 | *addr_type = CTXSW_ADDR_TYPE_EGPC; | ||
3462 | gpc_addr = pri_gpccs_addr_mask(addr); | ||
3463 | if (pri_is_egpc_addr_shared(g, addr)) { | ||
3464 | *broadcast_flags |= PRI_BROADCAST_FLAGS_EGPC; | ||
3465 | *gpc_num = 0; | ||
3466 | nvgpu_log_info(g, "shared egpc"); | ||
3467 | } else { | ||
3468 | *gpc_num = pri_get_egpc_num(g, addr); | ||
3469 | nvgpu_log_info(g, "gpc=0x%x", *gpc_num); | ||
3470 | } | ||
3471 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { | ||
3472 | nvgpu_log_info(g, "addr=0x%x is etpc", addr); | ||
3473 | *addr_type = CTXSW_ADDR_TYPE_ETPC; | ||
3474 | if (pri_is_tpc_addr_shared(g, gpc_addr)) { | ||
3475 | *broadcast_flags |= PRI_BROADCAST_FLAGS_ETPC; | ||
3476 | *tpc_num = 0; | ||
3477 | nvgpu_log_info(g, "shared etpc"); | ||
3478 | } else { | ||
3479 | *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); | ||
3480 | nvgpu_log_info(g, "tpc=0x%x", *tpc_num); | ||
3481 | } | ||
3482 | tpc_addr = pri_tpccs_addr_mask(addr); | ||
3483 | if (pri_is_smpc_addr_in_etpc_shared(g, tpc_addr)) | ||
3484 | *broadcast_flags |= PRI_BROADCAST_FLAGS_SMPC; | ||
3485 | } | ||
3486 | |||
3487 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
3488 | "addr_type = %d, broadcast_flags = %#08x", | ||
3489 | *addr_type, *broadcast_flags); | ||
3490 | return 0; | ||
3491 | } | ||
3492 | return -EINVAL; | ||
3493 | } | ||
3494 | |||
3495 | static void gv11b_gr_update_priv_addr_table_smpc(struct gk20a *g, u32 gpc_num, | ||
3496 | u32 tpc_num, u32 addr, | ||
3497 | u32 *priv_addr_table, u32 *t) | ||
3498 | { | ||
3499 | u32 sm_per_tpc, sm_num; | ||
3500 | |||
3501 | nvgpu_log_info(g, "broadcast flags smpc"); | ||
3502 | |||
3503 | sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
3504 | for (sm_num = 0; sm_num < sm_per_tpc; sm_num++) { | ||
3505 | priv_addr_table[*t] = pri_smpc_ext_addr(g, | ||
3506 | pri_smpc_in_etpc_addr_mask(g, addr), | ||
3507 | gpc_num, tpc_num, sm_num); | ||
3508 | nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", | ||
3509 | *t, priv_addr_table[*t]); | ||
3510 | (*t)++; | ||
3511 | } | ||
3512 | } | ||
3513 | |||
3514 | void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, | ||
3515 | u32 gpc, u32 broadcast_flags, u32 *priv_addr_table, u32 *t) | ||
3516 | { | ||
3517 | u32 gpc_num, tpc_num; | ||
3518 | |||
3519 | nvgpu_log_info(g, "addr=0x%x", addr); | ||
3520 | |||
3521 | /* The GPC/TPC unicast registers are included in the compressed PRI | ||
3522 | * tables. Convert a GPC/TPC broadcast address to unicast addresses so | ||
3523 | * that we can look up the offsets. | ||
3524 | */ | ||
3525 | if (broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) { | ||
3526 | nvgpu_log_info(g, "broadcast flags egpc"); | ||
3527 | for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { | ||
3528 | |||
3529 | if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { | ||
3530 | nvgpu_log_info(g, "broadcast flags etpc"); | ||
3531 | for (tpc_num = 0; | ||
3532 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | ||
3533 | tpc_num++) { | ||
3534 | if (broadcast_flags & | ||
3535 | PRI_BROADCAST_FLAGS_SMPC) { | ||
3536 | gv11b_gr_update_priv_addr_table_smpc( | ||
3537 | g, gpc_num, tpc_num, addr, | ||
3538 | priv_addr_table, t); | ||
3539 | } else { | ||
3540 | priv_addr_table[*t] = | ||
3541 | pri_etpc_addr(g, | ||
3542 | pri_tpccs_addr_mask(addr), | ||
3543 | gpc_num, tpc_num); | ||
3544 | nvgpu_log_info(g, | ||
3545 | "priv_addr_table[%d]:%#08x", | ||
3546 | *t, priv_addr_table[*t]); | ||
3547 | (*t)++; | ||
3548 | } | ||
3549 | } | ||
3550 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) { | ||
3551 | tpc_num = 0; | ||
3552 | gv11b_gr_update_priv_addr_table_smpc( | ||
3553 | g, gpc_num, tpc_num, addr, | ||
3554 | priv_addr_table, t); | ||
3555 | } else { | ||
3556 | priv_addr_table[*t] = | ||
3557 | pri_egpc_addr(g, | ||
3558 | pri_gpccs_addr_mask(addr), | ||
3559 | gpc_num); | ||
3560 | nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", | ||
3561 | *t, priv_addr_table[*t]); | ||
3562 | (*t)++; | ||
3563 | } | ||
3564 | } | ||
3565 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_EGPC)) { | ||
3566 | if (broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) { | ||
3567 | nvgpu_log_info(g, "broadcast flags etpc but not egpc"); | ||
3568 | gpc_num = 0; | ||
3569 | for (tpc_num = 0; | ||
3570 | tpc_num < g->gr.gpc_tpc_count[gpc]; | ||
3571 | tpc_num++) { | ||
3572 | if (broadcast_flags & | ||
3573 | PRI_BROADCAST_FLAGS_SMPC) | ||
3574 | gv11b_gr_update_priv_addr_table_smpc( | ||
3575 | g, gpc_num, tpc_num, addr, | ||
3576 | priv_addr_table, t); | ||
3577 | else { | ||
3578 | priv_addr_table[*t] = | ||
3579 | pri_etpc_addr(g, | ||
3580 | pri_tpccs_addr_mask(addr), | ||
3581 | gpc, tpc_num); | ||
3582 | nvgpu_log_info(g, | ||
3583 | "priv_addr_table[%d]:%#08x", | ||
3584 | *t, priv_addr_table[*t]); | ||
3585 | (*t)++; | ||
3586 | } | ||
3587 | } | ||
3588 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) { | ||
3589 | tpc_num = 0; | ||
3590 | gpc_num = 0; | ||
3591 | gv11b_gr_update_priv_addr_table_smpc( | ||
3592 | g, gpc_num, tpc_num, addr, | ||
3593 | priv_addr_table, t); | ||
3594 | } else { | ||
3595 | priv_addr_table[*t] = addr; | ||
3596 | nvgpu_log_info(g, "priv_addr_table[%d]:%#08x", | ||
3597 | *t, priv_addr_table[*t]); | ||
3598 | (*t)++; | ||
3599 | } | ||
3600 | } | ||
3601 | } | ||
3602 | |||
3603 | u32 gv11b_gr_get_egpc_base(struct gk20a *g) | ||
3604 | { | ||
3605 | return EGPC_PRI_BASE; | ||
3606 | } | ||
3607 | |||
3608 | void gr_gv11b_init_gpc_mmu(struct gk20a *g) | ||
3609 | { | ||
3610 | u32 temp; | ||
3611 | |||
3612 | nvgpu_log_info(g, "initialize gpc mmu"); | ||
3613 | |||
3614 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
3615 | /* Bypass MMU check for non-secure boot. For | ||
3616 | * secure-boot,this register write has no-effect */ | ||
3617 | gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); | ||
3618 | } | ||
3619 | temp = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
3620 | temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | | ||
3621 | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | | ||
3622 | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | | ||
3623 | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | | ||
3624 | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | | ||
3625 | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | | ||
3626 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | ||
3627 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | ||
3628 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); | ||
3629 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | ||
3630 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | ||
3631 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | ||
3632 | |||
3633 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), | ||
3634 | gk20a_readl(g, fb_mmu_debug_ctrl_r())); | ||
3635 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), | ||
3636 | gk20a_readl(g, fb_mmu_debug_wr_r())); | ||
3637 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), | ||
3638 | gk20a_readl(g, fb_mmu_debug_rd_r())); | ||
3639 | } | ||