summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
authorSeshendra Gadagottu <sgadagottu@nvidia.com>2016-04-14 16:01:58 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-16 10:48:28 -0400
commitc84ddceda648d6e47828115654ca7745010ec09f (patch)
tree3dcd960110c55f694a16230323e77ac4d008970d /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent07cd80ab096346d64f93b051e80ba43c090deb09 (diff)
gpu: nvgpu: gv11b: sm priv reg related changes
Included all basic ops for gv11b and updated sm related functions to include new priv register addresses. Bug 1735757 Change-Id: Ie48651f918ee97fba00487111e4b28d6c95747f5 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1126961 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c1743
1 files changed, 1741 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d775aae8..f0736e19 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * GV11B GPU GR 2 * GV11b GPU GR
3 * 3 *
4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
@@ -21,11 +21,1750 @@
21#include "gk20a/semaphore_gk20a.h" 21#include "gk20a/semaphore_gk20a.h"
22#include "gk20a/dbg_gpu_gk20a.h" 22#include "gk20a/dbg_gpu_gk20a.h"
23 23
24#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */ 24#include "gm20b/gr_gm20b.h"
25#include "gp10b/gr_gp10b.h" 25#include "gp10b/gr_gp10b.h"
26#include "gv11b/gr_gv11b.h" 26#include "gv11b/gr_gv11b.h"
27#include "hw_gr_gv11b.h"
28#include "hw_fifo_gv11b.h"
29#include "hw_proj_gv11b.h"
30#include "hw_ctxsw_prog_gv11b.h"
31#include "hw_mc_gv11b.h"
32#include <linux/vmalloc.h>
33
34static bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
35{
36 bool valid = false;
37
38 switch (class_num) {
39 case VOLTA_COMPUTE_A:
40 case VOLTA_A:
41 case VOLTA_DMA_COPY_A:
42 valid = true;
43 break;
44
45 case MAXWELL_COMPUTE_B:
46 case MAXWELL_B:
47 case FERMI_TWOD_A:
48 case KEPLER_DMA_COPY_A:
49 case MAXWELL_DMA_COPY_A:
50 case PASCAL_COMPUTE_A:
51 case PASCAL_A:
52 case PASCAL_DMA_COPY_A:
53 valid = true;
54 break;
55
56 default:
57 break;
58 }
59 gk20a_dbg_info("class=0x%x valid=%d", class_num, valid);
60 return valid;
61}
62
63static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
64 bool *post_event, struct channel_gk20a *fault_ch)
65{
66 int ret = 0;
67 u32 offset = proj_gpc_stride_v() * gpc +
68 proj_tpc_in_gpc_stride_v() * tpc;
69 u32 lrf_ecc_status, shm_ecc_status;
70
71 gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch);
72
73 /* Check for LRF ECC errors. */
74 lrf_ecc_status = gk20a_readl(g,
75 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
76 if ( (lrf_ecc_status &
77 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) ||
78 (lrf_ecc_status &
79 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) ||
80 (lrf_ecc_status &
81 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) ||
82 (lrf_ecc_status &
83 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) {
84
85 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
86 "Single bit error detected in SM LRF!");
87
88 g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] +=
89 gk20a_readl(g,
90 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset);
91 gk20a_writel(g,
92 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
93 0);
94 }
95 if ( (lrf_ecc_status &
96 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
97 (lrf_ecc_status &
98 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) ||
99 (lrf_ecc_status &
100 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) ||
101 (lrf_ecc_status &
102 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) {
103
104 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
105 "Double bit error detected in SM LRF!");
106
107 g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] +=
108 gk20a_readl(g,
109 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset);
110 gk20a_writel(g,
111 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
112 0);
113 }
114 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
115 lrf_ecc_status);
116
117 /* Check for SHM ECC errors. */
118 shm_ecc_status = gk20a_readl(g,
119 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
120 if ((shm_ecc_status &
121 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) ||
122 (shm_ecc_status &
123 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) ||
124 (shm_ecc_status &
125 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
126 (shm_ecc_status &
127 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
128 u32 ecc_stats_reg_val;
129
130 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
131 "Single bit error detected in SM SHM!");
132
133 ecc_stats_reg_val =
134 gk20a_readl(g,
135 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
136 g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] +=
137 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
138 g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] +=
139 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
140 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
141 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
142 gk20a_writel(g,
143 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
144 ecc_stats_reg_val);
145 }
146 if ( (shm_ecc_status &
147 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
148 (shm_ecc_status &
149 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
150 u32 ecc_stats_reg_val;
151
152 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
153 "Double bit error detected in SM SHM!");
154
155 ecc_stats_reg_val =
156 gk20a_readl(g,
157 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
158 g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] +=
159 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
160 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
161 gk20a_writel(g,
162 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
163 ecc_stats_reg_val);
164 }
165 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
166 shm_ecc_status);
167
168
169 return ret;
170}
171
172static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
173 bool *post_event)
174{
175 int ret = 0;
176 u32 offset = proj_gpc_stride_v() * gpc +
177 proj_tpc_in_gpc_stride_v() * tpc;
178 u32 esr;
179 u32 ecc_stats_reg_val;
180
181 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
182
183 esr = gk20a_readl(g,
184 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
185 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
186
187 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
188 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
189 "Single bit error detected in TEX!");
190
191 /* Pipe 0 counters */
192 gk20a_writel(g,
193 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
194 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
195
196 ecc_stats_reg_val = gk20a_readl(g,
197 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
198 g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] +=
199 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
200 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
201 gk20a_writel(g,
202 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
203 ecc_stats_reg_val);
204
205 ecc_stats_reg_val = gk20a_readl(g,
206 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
207 g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] +=
208 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
209 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
210 gk20a_writel(g,
211 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
212 ecc_stats_reg_val);
213
214
215 /* Pipe 1 counters */
216 gk20a_writel(g,
217 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
218 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
219
220 ecc_stats_reg_val = gk20a_readl(g,
221 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
222 g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] +=
223 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
224 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
225 gk20a_writel(g,
226 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
227 ecc_stats_reg_val);
228
229 ecc_stats_reg_val = gk20a_readl(g,
230 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
231 g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] +=
232 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
233 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
234 gk20a_writel(g,
235 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
236 ecc_stats_reg_val);
237
238
239 gk20a_writel(g,
240 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
241 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
242 }
243 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
244 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
245 "Double bit error detected in TEX!");
246
247 /* Pipe 0 counters */
248 gk20a_writel(g,
249 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
250 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
251
252 ecc_stats_reg_val = gk20a_readl(g,
253 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
254 g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] +=
255 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
256 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
257 gk20a_writel(g,
258 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
259 ecc_stats_reg_val);
260
261 ecc_stats_reg_val = gk20a_readl(g,
262 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
263 g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] +=
264 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
265 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
266 gk20a_writel(g,
267 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
268 ecc_stats_reg_val);
269
270
271 /* Pipe 1 counters */
272 gk20a_writel(g,
273 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
274 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
275
276 ecc_stats_reg_val = gk20a_readl(g,
277 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
278 g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] +=
279 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
280 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
281 gk20a_writel(g,
282 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
283 ecc_stats_reg_val);
284
285 ecc_stats_reg_val = gk20a_readl(g,
286 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
287 g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] +=
288 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
289 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
290 gk20a_writel(g,
291 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
292 ecc_stats_reg_val);
293
294
295 gk20a_writel(g,
296 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
297 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
298 }
299
300 gk20a_writel(g,
301 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
302 esr);
303
304 return ret;
305}
306
307static int gr_gv11b_commit_global_cb_manager(struct gk20a *g,
308 struct channel_gk20a *c, bool patch)
309{
310 struct gr_gk20a *gr = &g->gr;
311 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
312 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
313 u32 attrib_offset_in_chunk = 0;
314 u32 alpha_offset_in_chunk = 0;
315 u32 pd_ab_max_output;
316 u32 gpc_index, ppc_index;
317 u32 temp, temp2;
318 u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
319 u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
320
321 gk20a_dbg_fn("");
322
323 if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
324 attrib_size_in_chunk = gr->attrib_cb_default_size +
325 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
326 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
327 cb_attrib_cache_size_init = gr->attrib_cb_default_size +
328 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
329 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
330 } else {
331 attrib_size_in_chunk = gr->attrib_cb_size;
332 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
333 }
334
335 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
336 gr->attrib_cb_default_size, patch);
337 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(),
338 gr->alpha_cb_default_size, patch);
339
340 pd_ab_max_output = (gr->alpha_cb_default_size *
341 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
342 gr_pd_ab_dist_cfg1_max_output_granularity_v();
343
344 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
345 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
346 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
347
348 attrib_offset_in_chunk = alpha_offset_in_chunk +
349 gr->tpc_count * gr->alpha_cb_size;
350
351 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
352 temp = proj_gpc_stride_v() * gpc_index;
353 temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index;
354 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
355 ppc_index++) {
356 cbm_cfg_size_beta = cb_attrib_cache_size_init *
357 gr->pes_tpc_count[ppc_index][gpc_index];
358 cbm_cfg_size_alpha = gr->alpha_cb_default_size *
359 gr->pes_tpc_count[ppc_index][gpc_index];
360 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
361 gr->pes_tpc_count[ppc_index][gpc_index];
362
363 gr_gk20a_ctx_patch_write(g, ch_ctx,
364 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
365 proj_ppc_in_gpc_stride_v() * ppc_index,
366 cbm_cfg_size_beta, patch);
367
368 gr_gk20a_ctx_patch_write(g, ch_ctx,
369 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
370 proj_ppc_in_gpc_stride_v() * ppc_index,
371 attrib_offset_in_chunk, patch);
372
373 gr_gk20a_ctx_patch_write(g, ch_ctx,
374 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
375 proj_ppc_in_gpc_stride_v() * ppc_index,
376 cbm_cfg_size_steadystate,
377 patch);
378
379 attrib_offset_in_chunk += attrib_size_in_chunk *
380 gr->pes_tpc_count[ppc_index][gpc_index];
381
382 gr_gk20a_ctx_patch_write(g, ch_ctx,
383 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
384 proj_ppc_in_gpc_stride_v() * ppc_index,
385 cbm_cfg_size_alpha, patch);
386
387 gr_gk20a_ctx_patch_write(g, ch_ctx,
388 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
389 proj_ppc_in_gpc_stride_v() * ppc_index,
390 alpha_offset_in_chunk, patch);
391
392 alpha_offset_in_chunk += gr->alpha_cb_size *
393 gr->pes_tpc_count[ppc_index][gpc_index];
394
395 gr_gk20a_ctx_patch_write(g, ch_ctx,
396 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
397 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
398 patch);
399 }
400 }
401
402 return 0;
403}
404
405static void gr_gv11b_commit_global_pagepool(struct gk20a *g,
406 struct channel_ctx_gk20a *ch_ctx,
407 u64 addr, u32 size, bool patch)
408{
409 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
410 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
411
412 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
413 gr_scc_pagepool_total_pages_f(size) |
414 gr_scc_pagepool_valid_true_f(), patch);
415
416 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
417 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
418
419 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
420 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
421}
422
423static int gr_gv11b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
424 struct zbc_entry *color_val, u32 index)
425{
426 u32 i;
427 u32 zbc_c;
428
429 /* update l2 table */
430 g->ops.ltc.set_zbc_color_entry(g, color_val, index);
431
432 /* update ds table */
433 gk20a_writel(g, gr_ds_zbc_color_r_r(),
434 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
435 gk20a_writel(g, gr_ds_zbc_color_g_r(),
436 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
437 gk20a_writel(g, gr_ds_zbc_color_b_r(),
438 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
439 gk20a_writel(g, gr_ds_zbc_color_a_r(),
440 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
441
442 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
443 gr_ds_zbc_color_fmt_val_f(color_val->format));
444
445 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
446 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
447
448 /* trigger the write */
449 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
450 gr_ds_zbc_tbl_ld_select_c_f() |
451 gr_ds_zbc_tbl_ld_action_write_f() |
452 gr_ds_zbc_tbl_ld_trigger_active_f());
453
454 /* update local copy */
455 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
456 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
457 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
458 }
459 gr->zbc_col_tbl[index].format = color_val->format;
460 gr->zbc_col_tbl[index].ref_cnt++;
461
462 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_r_r(index),
463 color_val->color_ds[0]);
464 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_g_r(index),
465 color_val->color_ds[1]);
466 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_b_r(index),
467 color_val->color_ds[2]);
468 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_a_r(index),
469 color_val->color_ds[3]);
470 zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3));
471 zbc_c &= ~(0x7f << ((index % 4) * 7));
472 zbc_c |= color_val->format << ((index % 4) * 7);
473 gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c);
474
475 return 0;
476}
477
478static int gr_gv11b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
479 struct zbc_entry *depth_val, u32 index)
480{
481 u32 zbc_z;
482
483 /* update l2 table */
484 g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
485
486 /* update ds table */
487 gk20a_writel(g, gr_ds_zbc_z_r(),
488 gr_ds_zbc_z_val_f(depth_val->depth));
489
490 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
491 gr_ds_zbc_z_fmt_val_f(depth_val->format));
492
493 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
494 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
495
496 /* trigger the write */
497 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
498 gr_ds_zbc_tbl_ld_select_z_f() |
499 gr_ds_zbc_tbl_ld_action_write_f() |
500 gr_ds_zbc_tbl_ld_trigger_active_f());
501
502 /* update local copy */
503 gr->zbc_dep_tbl[index].depth = depth_val->depth;
504 gr->zbc_dep_tbl[index].format = depth_val->format;
505 gr->zbc_dep_tbl[index].ref_cnt++;
506
507 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth);
508 zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3));
509 zbc_z &= ~(0x7f << (index % 4) * 7);
510 zbc_z |= depth_val->format << (index % 4) * 7;
511 gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3), zbc_z);
512
513 return 0;
514}
515
516static u32 gr_gv11b_pagepool_default_size(struct gk20a *g)
517{
518 return gr_scc_pagepool_total_pages_hwmax_value_v();
519}
520
521static int gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g)
522{
523 struct gr_gk20a *gr = &g->gr;
524 int size;
525
526 gr->attrib_cb_size = gr->attrib_cb_default_size;
527 gr->alpha_cb_size = gr->alpha_cb_default_size;
528
529 gr->attrib_cb_size = min(gr->attrib_cb_size,
530 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count);
531 gr->alpha_cb_size = min(gr->alpha_cb_size,
532 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count);
533
534 size = gr->attrib_cb_size *
535 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
536 gr->max_tpc_count;
537
538 size += gr->alpha_cb_size *
539 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
540 gr->max_tpc_count;
541
542 size = ALIGN(size, 128);
543
544 return size;
545}
546
547static void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data)
548{
549 gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
550}
551
552static void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
553{
554 u32 val;
555
556 gk20a_dbg_fn("");
557
558 val = gk20a_readl(g, gr_gpcs_tc_debug0_r());
559 val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(),
560 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data));
561 gk20a_writel(g, gr_gpcs_tc_debug0_r(), val);
562
563 gk20a_dbg_fn("done");
564}
565
566static int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr,
567 u32 class_num, u32 offset, u32 data)
568{
569 gk20a_dbg_fn("");
570
571 if (class_num == PASCAL_COMPUTE_A) {
572 switch (offset << 2) {
573 case NVC0C0_SET_SHADER_EXCEPTIONS:
574 gk20a_gr_set_shader_exceptions(g, data);
575 break;
576 default:
577 goto fail;
578 }
579 }
580
581 if (class_num == PASCAL_A) {
582 switch (offset << 2) {
583 case NVC097_SET_SHADER_EXCEPTIONS:
584 gk20a_gr_set_shader_exceptions(g, data);
585 break;
586 case NVC097_SET_CIRCULAR_BUFFER_SIZE:
587 g->ops.gr.set_circular_buffer_size(g, data);
588 break;
589 case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
590 g->ops.gr.set_alpha_circular_buffer_size(g, data);
591 break;
592 case NVC097_SET_GO_IDLE_TIMEOUT:
593 gr_gv11b_set_go_idle_timeout(g, data);
594 break;
595 case NVC097_SET_COALESCE_BUFFER_SIZE:
596 gr_gv11b_set_coalesce_buffer_size(g, data);
597 break;
598 default:
599 goto fail;
600 }
601 }
602 return 0;
603
604fail:
605 return -EINVAL;
606}
607
608static void gr_gv11b_cb_size_default(struct gk20a *g)
609{
610 struct gr_gk20a *gr = &g->gr;
611
612 if (!gr->attrib_cb_default_size)
613 gr->attrib_cb_default_size = 0x800;
614 gr->alpha_cb_default_size =
615 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
616}
617
618static void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
619{
620 struct gr_gk20a *gr = &g->gr;
621 u32 gpc_index, ppc_index, stride, val;
622 u32 pd_ab_max_output;
623 u32 alpha_cb_size = data * 4;
624
625 gk20a_dbg_fn("");
626
627 if (alpha_cb_size > gr->alpha_cb_size)
628 alpha_cb_size = gr->alpha_cb_size;
629
630 gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
631 (gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) &
632 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
633 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
634
635 pd_ab_max_output = alpha_cb_size *
636 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
637 gr_pd_ab_dist_cfg1_max_output_granularity_v();
638
639 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
640 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
641 gr_pd_ab_dist_cfg1_max_batches_init_f());
642
643 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
644 stride = proj_gpc_stride_v() * gpc_index;
645
646 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
647 ppc_index++) {
648
649 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
650 stride +
651 proj_ppc_in_gpc_stride_v() * ppc_index);
652
653 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
654 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
655 gr->pes_tpc_count[ppc_index][gpc_index]));
656
657 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
658 stride +
659 proj_ppc_in_gpc_stride_v() * ppc_index, val);
660 }
661 }
662}
663
664static void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
665{
666 struct gr_gk20a *gr = &g->gr;
667 u32 gpc_index, ppc_index, stride, val;
668 u32 cb_size_steady = data * 4, cb_size;
669
670 gk20a_dbg_fn("");
671
672 if (cb_size_steady > gr->attrib_cb_size)
673 cb_size_steady = gr->attrib_cb_size;
674 if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
675 gk20a_readl(g,
676 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) {
677 cb_size = cb_size_steady +
678 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
679 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
680 } else {
681 cb_size = cb_size_steady;
682 }
683
684 gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(),
685 (gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
686 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
687 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
688
689 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
690 stride = proj_gpc_stride_v() * gpc_index;
691
692 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
693 ppc_index++) {
694
695 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
696 stride +
697 proj_ppc_in_gpc_stride_v() * ppc_index);
698
699 val = set_field(val,
700 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
701 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
702 gr->pes_tpc_count[ppc_index][gpc_index]));
703
704 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
705 stride +
706 proj_ppc_in_gpc_stride_v() * ppc_index, val);
707
708 gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index +
709 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
710 stride,
711 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
712 cb_size_steady));
713
714 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
715 ppc_index + gpc_index));
716
717 val = set_field(val,
718 gr_gpcs_swdx_tc_beta_cb_size_v_m(),
719 gr_gpcs_swdx_tc_beta_cb_size_v_f(
720 cb_size_steady *
721 gr->gpc_ppc_count[gpc_index]));
722
723 gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
724 ppc_index + gpc_index), val);
725 }
726 }
727}
728
729static int gr_gv11b_init_ctx_state(struct gk20a *g)
730{
731 struct fecs_method_op_gk20a op = {
732 .mailbox = { .id = 0, .data = 0,
733 .clr = ~0, .ok = 0, .fail = 0},
734 .method.data = 0,
735 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
736 .cond.fail = GR_IS_UCODE_OP_SKIP,
737 };
738 int err;
739
740 gk20a_dbg_fn("");
741
742 err = gr_gk20a_init_ctx_state(g);
743 if (err)
744 return err;
745
746 if (!g->gr.t18x.ctx_vars.preempt_image_size) {
747 op.method.addr =
748 gr_fecs_method_push_adr_discover_preemption_image_size_v();
749 op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
750 err = gr_gk20a_submit_fecs_method_op(g, op, false);
751 if (err) {
752 gk20a_err(dev_from_gk20a(g),
753 "query preempt image size failed");
754 return err;
755 }
756 }
757
758 gk20a_dbg_info("preempt image size: %u",
759 g->gr.t18x.ctx_vars.preempt_image_size);
760
761 gk20a_dbg_fn("done");
762
763 return 0;
764}
765
766int gr_gv11b_alloc_buffer(struct vm_gk20a *vm, size_t size,
767 struct mem_desc *mem)
768{
769 int err;
770
771 gk20a_dbg_fn("");
772
773 err = gk20a_gmmu_alloc_attr(vm->mm->g, 0, size, mem);
774 if (err)
775 return err;
776
777 mem->gpu_va = gk20a_gmmu_map(vm,
778 &mem->sgt,
779 size,
780 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
781 gk20a_mem_flag_none,
782 false);
783
784 if (!mem->gpu_va) {
785 err = -ENOMEM;
786 goto fail_free;
787 }
788
789 return 0;
790
791fail_free:
792 gk20a_gmmu_free(vm->mm->g, mem);
793 return err;
794}
795
796static int gr_gv11b_alloc_gr_ctx(struct gk20a *g,
797 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
798 u32 class,
799 u32 flags)
800{
801 int err;
802
803 gk20a_dbg_fn("");
804
805 err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
806 if (err)
807 return err;
808
809 (*gr_ctx)->t18x.ctx_id_valid = false;
810
811 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
812 flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
813
814 if (class == PASCAL_COMPUTE_A &&
815 g->gr.t18x.ctx_vars.force_preemption_cilp)
816 flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
817
818 if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
819 u32 spill_size =
820 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
821 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
822 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
823 gr_scc_pagepool_total_pages_byte_granularity_v();
824 u32 betacb_size = g->gr.attrib_cb_default_size +
825 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
826 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
827 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
828 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
829 g->gr.max_tpc_count;
830 attrib_cb_size = ALIGN(attrib_cb_size, 128);
831
832 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
833 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
834 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
835 attrib_cb_size);
836 err = gr_gv11b_alloc_buffer(vm,
837 g->gr.t18x.ctx_vars.preempt_image_size,
838 &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
839 if (err) {
840 gk20a_err(dev_from_gk20a(vm->mm->g),
841 "cannot allocate preempt buffer");
842 goto fail_free_gk20a_ctx;
843 }
844
845 err = gr_gv11b_alloc_buffer(vm,
846 spill_size,
847 &(*gr_ctx)->t18x.spill_ctxsw_buffer);
848 if (err) {
849 gk20a_err(dev_from_gk20a(vm->mm->g),
850 "cannot allocate spill buffer");
851 goto fail_free_preempt;
852 }
853
854 err = gr_gv11b_alloc_buffer(vm,
855 attrib_cb_size,
856 &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
857 if (err) {
858 gk20a_err(dev_from_gk20a(vm->mm->g),
859 "cannot allocate beta buffer");
860 goto fail_free_spill;
861 }
862
863 err = gr_gv11b_alloc_buffer(vm,
864 pagepool_size,
865 &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
866 if (err) {
867 gk20a_err(dev_from_gk20a(vm->mm->g),
868 "cannot allocate page pool");
869 goto fail_free_betacb;
870 }
871
872 (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP;
873 }
874
875 if (class == PASCAL_COMPUTE_A) {
876 if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
877 (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
878 else
879 (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
880 }
881
882 gk20a_dbg_fn("done");
883
884 return err;
885
886fail_free_betacb:
887 gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
888fail_free_spill:
889 gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
890fail_free_preempt:
891 gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
892fail_free_gk20a_ctx:
893 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
894 *gr_ctx = NULL;
895
896 return err;
897}
898
899static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
900 struct gr_ctx_desc *gr_ctx) {
901 void *ctx_ptr = vmap(gr_ctx->mem.pages,
902 PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT,
903 0, pgprot_writecombine(PAGE_KERNEL));
904 if (!ctx_ptr) {
905 WARN_ON("Cannot map context");
906 return;
907 }
908 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n",
909 gk20a_mem_rd32(ctx_ptr +
910 ctxsw_prog_main_image_magic_value_o(), 0),
911 ctxsw_prog_main_image_magic_value_v_value_v());
912
913 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n",
914 gk20a_mem_rd32(ctx_ptr +
915 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0));
916
917 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n",
918 gk20a_mem_rd32(ctx_ptr +
919 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0));
920
921 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n",
922 gk20a_mem_rd32(ctx_ptr +
923 ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0));
924
925 gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
926 gk20a_mem_rd32(ctx_ptr +
927 ctxsw_prog_main_image_num_save_ops_o(), 0));
928 gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n",
929 gk20a_mem_rd32(ctx_ptr +
930 ctxsw_prog_main_image_num_wfi_save_ops_o(), 0));
931 gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n",
932 gk20a_mem_rd32(ctx_ptr +
933 ctxsw_prog_main_image_num_cta_save_ops_o(), 0));
934 gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n",
935 gk20a_mem_rd32(ctx_ptr +
936 ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0));
937 gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n",
938 gk20a_mem_rd32(ctx_ptr +
939 ctxsw_prog_main_image_num_cilp_save_ops_o(), 0));
940 gk20a_err(dev_from_gk20a(g),
941 "image gfx preemption option (GFXP is 1) %x\n",
942 gk20a_mem_rd32(ctx_ptr +
943 ctxsw_prog_main_image_graphics_preemption_options_o(),
944 0));
945 vunmap(ctx_ptr);
946}
947
948static void gr_gv11b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
949 struct gr_ctx_desc *gr_ctx)
950{
951 gk20a_dbg_fn("");
952
953 if (!gr_ctx)
954 return;
955
956 if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
957 dump_ctx_switch_stats(g, vm, gr_ctx);
958
959 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
960 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
961 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
962 gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
963 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
964 gk20a_dbg_fn("done");
965}
966
967
968static void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
969 struct channel_ctx_gk20a *ch_ctx,
970 void *ctx_ptr)
971{
972 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
973 u32 gfxp_preempt_option =
974 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
975 u32 cilp_preempt_option =
976 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
977 int err;
978
979 gk20a_dbg_fn("");
980
981 if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
982 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
983 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
984 gfxp_preempt_option);
985 }
986
987 if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) {
988 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
989 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0,
990 cilp_preempt_option);
991 }
992
993 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
994 u32 addr;
995 u32 size;
996 u32 cbes_reserve;
997
998 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0,
999 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
1000
1001 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
1002
1003 addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
1004 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1005 (u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
1006 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1007
1008 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1009 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
1010
1011 addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
1012 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
1013 (u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
1014 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
1015 size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
1016
1017 if (size == g->ops.gr.pagepool_default_size(g))
1018 size = gr_scc_pagepool_total_pages_hwmax_v();
1019
1020 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
1021
1022 addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
1023 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
1024 (u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
1025 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
1026 size = gr_ctx->t18x.spill_ctxsw_buffer.size /
1027 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1028
1029 gr_gk20a_ctx_patch_write(g, ch_ctx,
1030 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1031 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1032 true);
1033 gr_gk20a_ctx_patch_write(g, ch_ctx,
1034 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1035 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1036 true);
1037
1038 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1039 gr_gk20a_ctx_patch_write(g, ch_ctx,
1040 gr_gpcs_swdx_beta_cb_ctrl_r(),
1041 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1042 cbes_reserve),
1043 true);
1044 gr_gk20a_ctx_patch_write(g, ch_ctx,
1045 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1046 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1047 cbes_reserve),
1048 true);
1049
1050 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
1051 }
1052
1053 gk20a_dbg_fn("done");
1054}
1055
1056static int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
1057 struct gk20a_debug_output *o)
1058{
1059 struct gr_gk20a *gr = &g->gr;
1060
1061 gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
1062 gk20a_readl(g, gr_status_r()));
1063 gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
1064 gk20a_readl(g, gr_status_1_r()));
1065 gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
1066 gk20a_readl(g, gr_status_2_r()));
1067 gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
1068 gk20a_readl(g, gr_engine_status_r()));
1069 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
1070 gk20a_readl(g, gr_gpfifo_status_r()));
1071 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
1072 gk20a_readl(g, gr_gpfifo_ctl_r()));
1073 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
1074 gk20a_readl(g, gr_fecs_host_int_status_r()));
1075 gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n",
1076 gk20a_readl(g, gr_exception_r()));
1077 gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n",
1078 gk20a_readl(g, gr_fecs_intr_r()));
1079 gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
1080 gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A)));
1081 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
1082 gk20a_readl(g, gr_activity_0_r()));
1083 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
1084 gk20a_readl(g, gr_activity_1_r()));
1085 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
1086 gk20a_readl(g, gr_activity_2_r()));
1087 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
1088 gk20a_readl(g, gr_activity_4_r()));
1089 gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
1090 gk20a_readl(g, gr_pri_sked_activity_r()));
1091 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1092 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
1093 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1094 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
1095 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1096 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
1097 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1098 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
1099 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1100 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
1101 if (gr->gpc_tpc_count[0] == 2)
1102 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1103 gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
1104 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1105 gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
1106 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
1107 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
1108 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
1109 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
1110 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
1111 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
1112 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
1113 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
1114 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1115 gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
1116 if (gr->gpc_tpc_count[0] == 2)
1117 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1118 gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
1119 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
1120 gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
1121 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
1122 gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
1123 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
1124 gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
1125 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
1126 gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
1127 gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
1128 gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
1129 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
1130 gk20a_readl(g, gr_fe_go_idle_timeout_r()));
1131 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
1132 gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
1133 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
1134 gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
1135 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
1136 gk20a_readl(g, gr_cwd_fs_r()));
1137 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n",
1138 gk20a_readl(g, gr_fe_tpc_fs_r(0)));
1139 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n",
1140 gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
1141 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
1142 gk20a_readl(g, gr_cwd_sm_id_r(0)));
1143 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
1144 gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
1145 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
1146 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
1147 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
1148 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
1149 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
1150 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
1151 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
1152 gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
1153 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
1154 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
1155 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
1156 gk20a_readl(g, gr_fecs_current_ctx_r()));
1157 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
1158 gk20a_readl(g, gr_fecs_new_ctx_r()));
1159 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
1160 gk20a_readl(g, gr_pri_be0_crop_status1_r()));
1161 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
1162 gk20a_readl(g, gr_pri_bes_crop_status1_r()));
1163 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
1164 gk20a_readl(g, gr_pri_be0_zrop_status_r()));
1165 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
1166 gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
1167 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
1168 gk20a_readl(g, gr_pri_bes_zrop_status_r()));
1169 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
1170 gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
1171 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
1172 gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
1173 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
1174 gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
1175 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
1176 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
1177 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
1178 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
1179 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
1180 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
1181 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
1182 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
1183 return 0;
1184}
1185
1186static bool gr_activity_empty_or_preempted(u32 val)
1187{
1188 while(val) {
1189 u32 v = val & 7;
1190 if (v != gr_activity_4_gpc0_empty_v() &&
1191 v != gr_activity_4_gpc0_preempted_v())
1192 return false;
1193 val >>= 3;
1194 }
1195
1196 return true;
1197}
1198
1199static int gr_gv11b_wait_empty(struct gk20a *g, unsigned long end_jiffies,
1200 u32 expect_delay)
1201{
1202 u32 delay = expect_delay;
1203 bool gr_enabled;
1204 bool ctxsw_active;
1205 bool gr_busy;
1206 u32 gr_status;
1207 u32 activity0, activity1, activity2, activity4;
1208
1209 gk20a_dbg_fn("");
1210
1211 do {
1212 /* fmodel: host gets fifo_engine_status(gr) from gr
1213 only when gr_status is read */
1214 gr_status = gk20a_readl(g, gr_status_r());
1215
1216 gr_enabled = gk20a_readl(g, mc_enable_r()) &
1217 mc_enable_pgraph_enabled_f();
1218
1219 ctxsw_active = gr_status & 1<<7;
1220
1221 activity0 = gk20a_readl(g, gr_activity_0_r());
1222 activity1 = gk20a_readl(g, gr_activity_1_r());
1223 activity2 = gk20a_readl(g, gr_activity_2_r());
1224 activity4 = gk20a_readl(g, gr_activity_4_r());
1225
1226 gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
1227 gr_activity_empty_or_preempted(activity1) &&
1228 activity2 == 0 &&
1229 gr_activity_empty_or_preempted(activity4));
1230
1231 if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
1232 gk20a_dbg_fn("done");
1233 return 0;
1234 }
1235
1236 usleep_range(delay, delay * 2);
1237 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
1238
1239 } while (time_before(jiffies, end_jiffies)
1240 || !tegra_platform_is_silicon());
1241
1242 gk20a_err(dev_from_gk20a(g),
1243 "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
1244 ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
1245
1246 return -EAGAIN;
1247}
1248
1249static void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1250 struct channel_ctx_gk20a *ch_ctx,
1251 u64 addr, bool patch)
1252{
1253 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1254 int attrBufferSize;
1255
1256 if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va)
1257 attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size;
1258 else
1259 attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
1260
1261 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1262
1263 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
1264
1265 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1266 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1267 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1268
1269 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1270 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1271
1272 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1273 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1274 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1275}
1276
1277static void gr_gv11b_commit_global_bundle_cb(struct gk20a *g,
1278 struct channel_ctx_gk20a *ch_ctx,
1279 u64 addr, u64 size, bool patch)
1280{
1281 u32 data;
1282
1283 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
1284 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1285
1286 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
1287 gr_scc_bundle_cb_size_div_256b_f(size) |
1288 gr_scc_bundle_cb_size_valid_true_f(), patch);
1289
1290 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1291 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1292
1293 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1294 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1295 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1296
1297 /* data for state_limit */
1298 data = (g->gr.bundle_cb_default_size *
1299 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
1300 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
1301
1302 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
1303
1304 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1305 g->gr.bundle_cb_token_limit, data);
1306
1307 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
1308 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1309 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1310}
1311
1312static int gr_gv11b_init_fs_state(struct gk20a *g)
1313{
1314 u32 data;
1315
1316 data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
1317 data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
1318 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
1319 gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data);
1320
1321 data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
1322 data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(),
1323 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
1324 gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
1325
1326 return gr_gm20b_ctx_state_floorsweep(g);
1327}
1328
1329static void gr_gv11b_init_cyclestats(struct gk20a *g)
1330{
1331#if defined(CONFIG_GK20A_CYCLE_STATS)
1332 g->gpu_characteristics.flags |=
1333 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
1334 g->gpu_characteristics.flags |=
1335 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT;
1336#else
1337 (void)g;
1338#endif
1339}
1340
1341static void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1342{
1343 tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0);
1344 tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0);
1345
1346 if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
1347 tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
1348 else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2)
1349 tegra_fuse_writel(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0);
1350 else
1351 tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
1352}
1353
1354static void gr_gv11b_get_access_map(struct gk20a *g,
1355 u32 **whitelist, int *num_entries)
1356{
1357 static u32 wl_addr_gv11b[] = {
1358 /* this list must be sorted (low to high) */
1359 0x404468, /* gr_pri_mme_max_instructions */
1360 0x418300, /* gr_pri_gpcs_rasterarb_line_class */
1361 0x418800, /* gr_pri_gpcs_setup_debug */
1362 0x418e00, /* gr_pri_gpcs_swdx_config */
1363 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1364 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1365 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1366 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1367 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1368 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1369 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1370 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1371 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1372 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1373 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1374 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1375 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1376 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1377 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1378 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1379 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1380 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1381 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1382 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1383 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1384 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
1385 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
1386 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
1387 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
1388 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
1389 };
1390
1391 *whitelist = wl_addr_gv11b;
1392 *num_entries = ARRAY_SIZE(wl_addr_gv11b);
1393}
1394
1395static int gr_gv11b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
1396{
1397 int ret = 0;
1398
1399 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1400
1401 ret = gk20a_disable_channel_tsg(g, fault_ch);
1402 if (ret) {
1403 gk20a_err(dev_from_gk20a(g),
1404 "CILP: failed to disable channel/TSG!\n");
1405 return ret;
1406 }
1407
1408 ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
1409 if (ret) {
1410 gk20a_err(dev_from_gk20a(g),
1411 "CILP: failed to restart runlist 0!");
1412 return ret;
1413 }
1414
1415 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
1416
1417 if (gk20a_is_channel_marked_as_tsg(fault_ch))
1418 gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
1419 else
1420 gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
1421
1422 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg");
1423
1424 return ret;
1425}
1426
1427static int gr_gv11b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch)
1428{
1429 int ret;
1430 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1431
1432 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1433
1434 if (!gr_ctx)
1435 return -EINVAL;
1436
1437 if (gr_ctx->t18x.cilp_preempt_pending) {
1438 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1439 "CILP is already pending for chid %d",
1440 fault_ch->hw_chid);
1441 return 0;
1442 }
1443
1444 /* get ctx_id from the ucode image */
1445 if (!gr_ctx->t18x.ctx_id_valid) {
1446 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1447 "CILP: looking up ctx id");
1448 ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
1449 if (ret) {
1450 gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
1451 return ret;
1452 }
1453 gr_ctx->t18x.ctx_id_valid = true;
1454 }
1455
1456 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1457 "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
1458
1459 /* send ucode method to set ctxsw interrupt */
1460 ret = gr_gk20a_submit_fecs_sideband_method_op(g,
1461 (struct fecs_method_op_gk20a) {
1462 .method.data = gr_ctx->t18x.ctx_id,
1463 .method.addr =
1464 gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
1465 .mailbox = {
1466 .id = 1 /* sideband */, .data = 0,
1467 .clr = ~0, .ret = NULL,
1468 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
1469 .fail = 0},
1470 .cond.ok = GR_IS_UCODE_OP_EQUAL,
1471 .cond.fail = GR_IS_UCODE_OP_SKIP});
1472
1473 if (ret) {
1474 gk20a_err(dev_from_gk20a(g),
1475 "CILP: failed to enable ctxsw interrupt!");
1476 return ret;
1477 }
1478
1479 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1480 "CILP: enabled ctxsw completion interrupt");
1481
1482 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1483 "CILP: disabling channel %d",
1484 fault_ch->hw_chid);
1485
1486 ret = gr_gv11b_disable_channel_or_tsg(g, fault_ch);
1487 if (ret) {
1488 gk20a_err(dev_from_gk20a(g),
1489 "CILP: failed to disable channel!!");
1490 return ret;
1491 }
1492
1493 /* set cilp_preempt_pending = true and record the channel */
1494 gr_ctx->t18x.cilp_preempt_pending = true;
1495 g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
1496
1497 if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
1498 struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid];
1499
1500 gk20a_tsg_event_id_post_event(tsg,
1501 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED);
1502 } else {
1503 gk20a_channel_event_id_post_event(fault_ch,
1504 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED);
1505 }
1506
1507 return 0;
1508}
1509
1510static int gr_gv11b_clear_cilp_preempt_pending(struct gk20a *g,
1511 struct channel_gk20a *fault_ch)
1512{
1513 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1514
1515 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1516
1517 if (!gr_ctx)
1518 return -EINVAL;
1519
1520 /* The ucode is self-clearing, so all we need to do here is
1521 to clear cilp_preempt_pending. */
1522 if (!gr_ctx->t18x.cilp_preempt_pending) {
1523 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1524 "CILP is already cleared for chid %d\n",
1525 fault_ch->hw_chid);
1526 return 0;
1527 }
1528
1529 gr_ctx->t18x.cilp_preempt_pending = false;
1530 g->gr.t18x.cilp_preempt_pending_chid = -1;
1531
1532 return 0;
1533}
1534
1535/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1536 *
1537 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
1538 */
1539static int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
1540 u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
1541 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
1542 bool *early_exit, bool *ignore_debugger)
1543{
1544 int ret;
1545 bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
1546 NVGPU_GR_PREEMPTION_MODE_CILP) ;
1547 u32 global_mask = 0, dbgr_control0, global_esr_copy;
1548 u32 offset = proj_gpc_stride_v() * gpc +
1549 proj_tpc_in_gpc_stride_v() * tpc;
1550
1551 *early_exit = false;
1552 *ignore_debugger = false;
1553
1554 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
1555 gpc, tpc, global_esr);
1556
1557 if (cilp_enabled && sm_debugger_attached) {
1558 if (global_esr & gr_gpc0_tpc0_sm1_hww_global_esr_bpt_int_pending_f())
1559 gk20a_writel(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset,
1560 gr_gpc0_tpc0_sm1_hww_global_esr_bpt_int_pending_f());
1561
1562 if (global_esr & gr_gpc0_tpc0_sm1_hww_global_esr_single_step_complete_pending_f())
1563 gk20a_writel(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset,
1564 gr_gpc0_tpc0_sm1_hww_global_esr_single_step_complete_pending_f());
1565
1566 global_mask = gr_gpcs_tpcs_sm1_hww_global_esr_multiple_warp_errors_pending_f() |
1567 gr_gpcs_tpcs_sm1_hww_global_esr_bpt_pause_pending_f();
1568
1569 if (warp_esr != 0 || (global_esr & global_mask) != 0) {
1570 *ignore_debugger = true;
1571
1572 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1573 "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
1574 gpc, tpc);
1575
1576 if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
1577 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1578 "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
1579 gpc, tpc);
1580 gk20a_suspend_all_sms(g, global_mask, false);
1581
1582 gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
1583 } else {
1584 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1585 "CILP: STOP_TRIGGER from gpc %d tpc %d\n",
1586 gpc, tpc);
1587 gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
1588 }
1589
1590 /* reset the HWW errors after locking down */
1591 global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm1_hww_global_esr_r() + offset);
1592 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
1593 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1594 "CILP: HWWs cleared for gpc %d tpc %d\n",
1595 gpc, tpc);
1596
1597 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
1598 ret = gr_gv11b_set_cilp_preempt_pending(g, fault_ch);
1599 if (ret) {
1600 gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
1601 return ret;
1602 }
1603
1604 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm1_dbgr_control0_r() + offset);
1605 if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
1606 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1607 "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
1608 gpc, tpc);
1609 dbgr_control0 = set_field(dbgr_control0,
1610 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
1611 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
1612 gk20a_writel(g, gr_gpc0_tpc0_sm1_dbgr_control0_r() + offset, dbgr_control0);
1613 }
1614
1615 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1616 "CILP: resume for gpc %d tpc %d\n",
1617 gpc, tpc);
1618 gk20a_resume_single_sm(g, gpc, tpc);
1619
1620 *ignore_debugger = true;
1621 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
1622 }
1623
1624 *early_exit = true;
1625 }
1626 return 0;
1627}
1628
1629static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1630{
1631 struct gr_ctx_desc *gr_ctx;
1632 struct channel_gk20a *ch;
1633 int chid;
1634 int ret = -EINVAL;
1635
1636 chid = g->gr.t18x.cilp_preempt_pending_chid;
1637
1638 ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
1639 if (!ch)
1640 return ret;
1641
1642 gr_ctx = ch->ch_ctx.gr_ctx;
1643
1644 if (gr_ctx->t18x.cilp_preempt_pending) {
1645 *__chid = chid;
1646 ret = 0;
1647 }
1648
1649 gk20a_channel_put(ch);
1650
1651 return ret;
1652}
1653
1654static int gr_gv11b_handle_fecs_error(struct gk20a *g,
1655 struct channel_gk20a *__ch,
1656 struct gr_gk20a_isr_data *isr_data)
1657{
1658 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
1659 struct channel_gk20a *ch;
1660 int chid = -1;
1661 int ret = 0;
1662
1663 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1664
1665 /*
1666 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
1667 * indicates that a CILP ctxsw save has finished
1668 */
1669 if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
1670 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1671 "CILP: ctxsw save completed!\n");
1672
1673 /* now clear the interrupt */
1674 gk20a_writel(g, gr_fecs_host_int_clear_r(),
1675 gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
1676
1677 ret = gr_gv11b_get_cilp_preempt_pending_chid(g, &chid);
1678 if (ret)
1679 goto clean_up;
1680
1681 ch = gk20a_channel_get(
1682 gk20a_fifo_channel_from_hw_chid(g, chid));
1683 if (!ch)
1684 goto clean_up;
1685
1686
1687 /* set preempt_pending to false */
1688 ret = gr_gv11b_clear_cilp_preempt_pending(g, ch);
1689 if (ret) {
1690 gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
1691 gk20a_channel_put(ch);
1692 goto clean_up;
1693 }
1694
1695 if (gk20a_gr_sm_debugger_attached(g)) {
1696 if (gk20a_is_channel_marked_as_tsg(ch)) {
1697 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
1698 struct channel_gk20a *__ch;
1699
1700 mutex_lock(&tsg->ch_list_lock);
1701 list_for_each_entry(__ch, &tsg->ch_list, ch_entry) {
1702 gk20a_dbg_gpu_post_events(__ch);
1703 }
1704 mutex_unlock(&tsg->ch_list_lock);
1705
1706 gk20a_tsg_event_id_post_event(tsg,
1707 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
1708 } else {
1709 gk20a_dbg_gpu_post_events(ch);
1710
1711 gk20a_channel_event_id_post_event(ch,
1712 NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
1713 }
1714 }
1715
1716 gk20a_channel_put(ch);
1717 }
1718
1719clean_up:
1720 /* handle any remaining interrupts */
1721 return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
1722}
1723
1724static u32 gv11b_mask_hww_warp_esr(u32 hww_warp_esr)
1725{
1726 if (!(hww_warp_esr & gr_gpc0_tpc0_sm1_hww_warp_esr_addr_valid_m()))
1727 hww_warp_esr = set_field(hww_warp_esr,
1728 gr_gpc0_tpc0_sm1_hww_warp_esr_addr_error_type_m(),
1729 gr_gpc0_tpc0_sm1_hww_warp_esr_addr_error_type_none_f());
1730
1731 return hww_warp_esr;
1732}
27 1733
28void gv11b_init_gr(struct gpu_ops *gops) 1734void gv11b_init_gr(struct gpu_ops *gops)
29{ 1735{
30 gp10b_init_gr(gops); 1736 gp10b_init_gr(gops);
1737 gops->gr.init_fs_state = gr_gv11b_init_fs_state;
1738 gops->gr.is_valid_class = gr_gv11b_is_valid_class;
1739 gops->gr.commit_global_cb_manager = gr_gv11b_commit_global_cb_manager;
1740 gops->gr.commit_global_pagepool = gr_gv11b_commit_global_pagepool;
1741 gops->gr.add_zbc_color = gr_gv11b_add_zbc_color;
1742 gops->gr.add_zbc_depth = gr_gv11b_add_zbc_depth;
1743 gops->gr.pagepool_default_size = gr_gv11b_pagepool_default_size;
1744 gops->gr.calc_global_ctx_buffer_size =
1745 gr_gv11b_calc_global_ctx_buffer_size;
1746 gops->gr.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb;
1747 gops->gr.commit_global_bundle_cb = gr_gv11b_commit_global_bundle_cb;
1748 gops->gr.handle_sw_method = gr_gv11b_handle_sw_method;
1749 gops->gr.cb_size_default = gr_gv11b_cb_size_default;
1750 gops->gr.set_alpha_circular_buffer_size =
1751 gr_gv11b_set_alpha_circular_buffer_size;
1752 gops->gr.set_circular_buffer_size =
1753 gr_gv11b_set_circular_buffer_size;
1754 gops->gr.init_ctx_state = gr_gv11b_init_ctx_state;
1755 gops->gr.alloc_gr_ctx = gr_gv11b_alloc_gr_ctx;
1756 gops->gr.free_gr_ctx = gr_gv11b_free_gr_ctx;
1757 gops->gr.update_ctxsw_preemption_mode =
1758 gr_gv11b_update_ctxsw_preemption_mode;
1759 gops->gr.dump_gr_regs = gr_gv11b_dump_gr_status_regs;
1760 gops->gr.wait_empty = gr_gv11b_wait_empty;
1761 gops->gr.init_cyclestats = gr_gv11b_init_cyclestats;
1762 gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask;
1763 gops->gr.get_access_map = gr_gv11b_get_access_map;
1764 gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception;
1765 gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception;
1766 gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr;
1767 gops->gr.pre_process_sm_exception =
1768 gr_gv11b_pre_process_sm_exception;
1769 gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error;
31} 1770}