diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/tu104/gr_tu104.c | 549 |
1 files changed, 0 insertions, 549 deletions
diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c deleted file mode 100644 index fa6995ac..00000000 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ /dev/null | |||
@@ -1,549 +0,0 @@ | |||
1 | <<<<<<< HEAD (bbef4c gpu: nvgpu: initialize masks for the perfmon counters 3) | ||
2 | ======= | ||
3 | /* | ||
4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/types.h> | ||
26 | #include <nvgpu/soc.h> | ||
27 | #include <nvgpu/io.h> | ||
28 | #include <nvgpu/utils.h> | ||
29 | #include <nvgpu/gk20a.h> | ||
30 | #include <nvgpu/channel.h> | ||
31 | #include <nvgpu/netlist.h> | ||
32 | |||
33 | #include "gk20a/gr_gk20a.h" | ||
34 | #include "gk20a/gr_pri_gk20a.h" | ||
35 | |||
36 | #include "gp10b/gr_gp10b.h" | ||
37 | |||
38 | #include "gv11b/gr_gv11b.h" | ||
39 | |||
40 | #include "tu104/gr_tu104.h" | ||
41 | |||
42 | #include <nvgpu/hw/tu104/hw_gr_tu104.h> | ||
43 | |||
44 | bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num) | ||
45 | { | ||
46 | nvgpu_speculation_barrier(); | ||
47 | switch (class_num) { | ||
48 | case TURING_CHANNEL_GPFIFO_A: | ||
49 | case TURING_A: | ||
50 | case TURING_COMPUTE_A: | ||
51 | case TURING_DMA_COPY_A: | ||
52 | return true; | ||
53 | default: | ||
54 | break; | ||
55 | } | ||
56 | |||
57 | return gr_gv11b_is_valid_class(g, class_num); | ||
58 | }; | ||
59 | |||
60 | bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
61 | { | ||
62 | nvgpu_speculation_barrier(); | ||
63 | switch (class_num) { | ||
64 | case TURING_A: | ||
65 | return true; | ||
66 | default: | ||
67 | break; | ||
68 | } | ||
69 | |||
70 | return gr_gv11b_is_valid_gfx_class(g, class_num); | ||
71 | } | ||
72 | |||
73 | bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
74 | { | ||
75 | nvgpu_speculation_barrier(); | ||
76 | switch (class_num) { | ||
77 | case TURING_COMPUTE_A: | ||
78 | return true; | ||
79 | default: | ||
80 | break; | ||
81 | } | ||
82 | |||
83 | return gr_gv11b_is_valid_compute_class(g, class_num); | ||
84 | } | ||
85 | |||
86 | int gr_tu104_init_sw_bundle64(struct gk20a *g) | ||
87 | { | ||
88 | u32 i; | ||
89 | u32 last_bundle_data_lo = 0; | ||
90 | u32 last_bundle_data_hi = 0; | ||
91 | int err = 0; | ||
92 | struct netlist_av64_list *sw_bundle64_init = | ||
93 | &g->netlist_vars->sw_bundle64_init; | ||
94 | |||
95 | for (i = 0U; i < sw_bundle64_init->count; i++) { | ||
96 | if (i == 0U || | ||
97 | (last_bundle_data_lo != sw_bundle64_init->l[i].value_lo) || | ||
98 | (last_bundle_data_hi != sw_bundle64_init->l[i].value_hi)) { | ||
99 | nvgpu_writel(g, gr_pipe_bundle_data_r(), | ||
100 | sw_bundle64_init->l[i].value_lo); | ||
101 | nvgpu_writel(g, gr_pipe_bundle_data_hi_r(), | ||
102 | sw_bundle64_init->l[i].value_hi); | ||
103 | |||
104 | last_bundle_data_lo = sw_bundle64_init->l[i].value_lo; | ||
105 | last_bundle_data_hi = sw_bundle64_init->l[i].value_hi; | ||
106 | } | ||
107 | |||
108 | nvgpu_writel(g, gr_pipe_bundle_address_r(), | ||
109 | sw_bundle64_init->l[i].addr); | ||
110 | |||
111 | if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr) | ||
112 | == GR_GO_IDLE_BUNDLE) { | ||
113 | err = gr_gk20a_wait_idle(g, | ||
114 | gk20a_get_gr_idle_timeout(g), | ||
115 | GR_IDLE_CHECK_DEFAULT); | ||
116 | } else if (nvgpu_platform_is_silicon(g)) { | ||
117 | err = gr_gk20a_wait_fe_idle(g, | ||
118 | gk20a_get_gr_idle_timeout(g), | ||
119 | GR_IDLE_CHECK_DEFAULT); | ||
120 | } | ||
121 | if (err != 0) { | ||
122 | break; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | return err; | ||
127 | } | ||
128 | |||
129 | int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g) | ||
130 | { | ||
131 | int err; | ||
132 | struct gr_gk20a *gr = &g->gr; | ||
133 | u32 rtv_circular_buffer_size; | ||
134 | |||
135 | nvgpu_log_fn(g, " "); | ||
136 | |||
137 | rtv_circular_buffer_size = | ||
138 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
139 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) * | ||
140 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); | ||
141 | nvgpu_log_info(g, "rtv_circular_buffer_size : %u", | ||
142 | rtv_circular_buffer_size); | ||
143 | |||
144 | err = gk20a_gr_alloc_ctx_buffer(g, | ||
145 | &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER], | ||
146 | rtv_circular_buffer_size); | ||
147 | if (err != 0) { | ||
148 | return err; | ||
149 | } | ||
150 | |||
151 | err = gr_gk20a_alloc_global_ctx_buffers(g); | ||
152 | if (err != 0) { | ||
153 | goto clean_up; | ||
154 | } | ||
155 | |||
156 | return 0; | ||
157 | |||
158 | clean_up: | ||
159 | nvgpu_err(g, "fail"); | ||
160 | gk20a_gr_destroy_ctx_buffer(g, | ||
161 | &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER]); | ||
162 | |||
163 | return err; | ||
164 | } | ||
165 | |||
166 | int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm, | ||
167 | struct nvgpu_gr_ctx *gr_ctx, bool vpr) | ||
168 | { | ||
169 | int err; | ||
170 | u64 *g_bfr_va; | ||
171 | u64 *g_bfr_size; | ||
172 | int *g_bfr_index; | ||
173 | struct gr_gk20a *gr = &g->gr; | ||
174 | struct nvgpu_mem *mem; | ||
175 | u64 gpu_va; | ||
176 | |||
177 | nvgpu_log_fn(g, " "); | ||
178 | |||
179 | g_bfr_va = gr_ctx->global_ctx_buffer_va; | ||
180 | g_bfr_size = gr_ctx->global_ctx_buffer_size; | ||
181 | g_bfr_index = gr_ctx->global_ctx_buffer_index; | ||
182 | |||
183 | /* RTV circular buffer */ | ||
184 | mem = &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER].mem; | ||
185 | gpu_va = nvgpu_gmmu_map(vm, mem, mem->size, 0, | ||
186 | gk20a_mem_flag_none, true, mem->aperture); | ||
187 | if (gpu_va == 0ULL) { | ||
188 | return -ENOMEM; | ||
189 | } | ||
190 | |||
191 | g_bfr_va[RTV_CIRCULAR_BUFFER_VA] = gpu_va; | ||
192 | g_bfr_size[RTV_CIRCULAR_BUFFER_VA] = mem->size; | ||
193 | g_bfr_index[RTV_CIRCULAR_BUFFER_VA] = RTV_CIRCULAR_BUFFER; | ||
194 | |||
195 | err = gr_gk20a_map_global_ctx_buffers(g, vm, gr_ctx, vpr); | ||
196 | if (err != 0) { | ||
197 | goto clean_up; | ||
198 | } | ||
199 | |||
200 | return 0; | ||
201 | |||
202 | clean_up: | ||
203 | nvgpu_err(g, "fail"); | ||
204 | nvgpu_gmmu_unmap(vm, mem, gpu_va); | ||
205 | |||
206 | return err; | ||
207 | } | ||
208 | |||
209 | static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g, | ||
210 | struct nvgpu_gr_ctx *gr_ctx, | ||
211 | u64 addr, u32 size, u32 gfxpAddSize, bool patch) | ||
212 | { | ||
213 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(), | ||
214 | gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch); | ||
215 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(), | ||
216 | gr_scc_rm_rtv_cb_size_div_256b_f(size), patch); | ||
217 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(), | ||
218 | gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch); | ||
219 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(), | ||
220 | gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize), | ||
221 | patch); | ||
222 | } | ||
223 | |||
224 | int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, | ||
225 | struct nvgpu_gr_ctx *gr_ctx, bool patch) | ||
226 | { | ||
227 | int err; | ||
228 | u64 addr; | ||
229 | u32 size; | ||
230 | u32 gfxpaddsize = 0; | ||
231 | |||
232 | nvgpu_log_fn(g, " "); | ||
233 | |||
234 | err = gr_gk20a_commit_global_ctx_buffers(g, gr_ctx, patch); | ||
235 | if (err != 0) { | ||
236 | return err; | ||
237 | } | ||
238 | |||
239 | if (patch) { | ||
240 | int err; | ||
241 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); | ||
242 | if (err != 0) { | ||
243 | return err; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* RTV circular buffer */ | ||
248 | addr = gr_ctx->global_ctx_buffer_va[RTV_CIRCULAR_BUFFER_VA] >> | ||
249 | U64(gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()); | ||
250 | |||
251 | size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
252 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()); | ||
253 | |||
254 | gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size, | ||
255 | gfxpaddsize, patch); | ||
256 | |||
257 | if (patch) { | ||
258 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g, | ||
265 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm) | ||
266 | { | ||
267 | int err; | ||
268 | u32 rtv_cb_size; | ||
269 | |||
270 | nvgpu_log_fn(g, " "); | ||
271 | |||
272 | rtv_cb_size = | ||
273 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
274 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + | ||
275 | gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) * | ||
276 | gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v(); | ||
277 | |||
278 | err = gr_gp10b_alloc_buffer(vm, | ||
279 | rtv_cb_size, | ||
280 | &gr_ctx->gfxp_rtvcb_ctxsw_buffer); | ||
281 | |||
282 | return err; | ||
283 | } | ||
284 | |||
285 | void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g, | ||
286 | struct nvgpu_gr_ctx *gr_ctx, bool patch) | ||
287 | { | ||
288 | u64 addr; | ||
289 | u32 rtv_cb_size; | ||
290 | u32 gfxp_addr_size; | ||
291 | |||
292 | nvgpu_log_fn(g, " "); | ||
293 | |||
294 | rtv_cb_size = | ||
295 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
296 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + | ||
297 | gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()); | ||
298 | gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(); | ||
299 | |||
300 | /* GFXP RTV circular buffer */ | ||
301 | addr = (u64)(u64_lo32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) >> | ||
302 | gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()) | | ||
303 | (u64)(u64_hi32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) << | ||
304 | (32U - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f())); | ||
305 | |||
306 | |||
307 | gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, | ||
308 | rtv_cb_size, | ||
309 | gfxp_addr_size, | ||
310 | patch); | ||
311 | } | ||
312 | |||
313 | void gr_tu104_bundle_cb_defaults(struct gk20a *g) | ||
314 | { | ||
315 | struct gr_gk20a *gr = &g->gr; | ||
316 | |||
317 | gr->bundle_cb_default_size = | ||
318 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
319 | gr->min_gpm_fifo_depth = | ||
320 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
321 | gr->bundle_cb_token_limit = | ||
322 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
323 | } | ||
324 | |||
325 | void gr_tu104_cb_size_default(struct gk20a *g) | ||
326 | { | ||
327 | struct gr_gk20a *gr = &g->gr; | ||
328 | |||
329 | if (gr->attrib_cb_default_size == 0U) { | ||
330 | gr->attrib_cb_default_size = | ||
331 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
332 | } | ||
333 | gr->alpha_cb_default_size = | ||
334 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
335 | gr->attrib_cb_gfxp_default_size = | ||
336 | gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); | ||
337 | gr->attrib_cb_gfxp_size = | ||
338 | gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); | ||
339 | } | ||
340 | |||
341 | void gr_tu104_free_gr_ctx(struct gk20a *g, | ||
342 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) | ||
343 | { | ||
344 | nvgpu_log_fn(g, " "); | ||
345 | |||
346 | if (gr_ctx != NULL) { | ||
347 | nvgpu_dma_unmap_free(vm, &gr_ctx->gfxp_rtvcb_ctxsw_buffer); | ||
348 | } | ||
349 | |||
350 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); | ||
351 | } | ||
352 | |||
353 | void gr_tu104_enable_gpc_exceptions(struct gk20a *g) | ||
354 | { | ||
355 | struct gr_gk20a *gr = &g->gr; | ||
356 | u32 tpc_mask; | ||
357 | |||
358 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
359 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
360 | |||
361 | tpc_mask = | ||
362 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1); | ||
363 | |||
364 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), | ||
365 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | | ||
366 | gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) | | ||
367 | gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1))); | ||
368 | } | ||
369 | |||
370 | int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g, | ||
371 | enum ctxsw_addr_type addr_type, | ||
372 | u32 num_tpcs, | ||
373 | u32 num_ppcs, | ||
374 | u32 reg_list_ppc_count, | ||
375 | u32 *__offset_in_segment) | ||
376 | { | ||
377 | u32 offset_in_segment = 0; | ||
378 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, | ||
379 | GPU_LIT_NUM_PES_PER_GPC); | ||
380 | |||
381 | if (addr_type == CTXSW_ADDR_TYPE_TPC) { | ||
382 | /* | ||
383 | * reg = g->netlist_vars->ctxsw_regs.tpc.l; | ||
384 | * offset_in_segment = 0; | ||
385 | */ | ||
386 | } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { | ||
387 | /* | ||
388 | * The ucode stores TPC data before PPC data. | ||
389 | * Advance offset past TPC data to PPC data. | ||
390 | */ | ||
391 | offset_in_segment = | ||
392 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
393 | num_tpcs) << 2); | ||
394 | } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { | ||
395 | /* | ||
396 | * The ucode stores TPC/PPC data before GPC data. | ||
397 | * Advance offset past TPC/PPC data to GPC data. | ||
398 | * | ||
399 | * Note 1 PES_PER_GPC case | ||
400 | */ | ||
401 | if (num_pes_per_gpc > 1U) { | ||
402 | offset_in_segment = | ||
403 | (((g->netlist_vars->ctxsw_regs.tpc.count * | ||
404 | num_tpcs) << 2) + | ||
405 | ((reg_list_ppc_count * num_ppcs) << 2)); | ||
406 | } else { | ||
407 | offset_in_segment = | ||
408 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
409 | num_tpcs) << 2); | ||
410 | } | ||
411 | } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || | ||
412 | (addr_type == CTXSW_ADDR_TYPE_ETPC)) { | ||
413 | if (num_pes_per_gpc > 1U) { | ||
414 | offset_in_segment = | ||
415 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
416 | num_tpcs) << 2) + | ||
417 | ((reg_list_ppc_count * num_ppcs) << 2) + | ||
418 | (g->netlist_vars->ctxsw_regs.gpc.count << 2); | ||
419 | } else { | ||
420 | offset_in_segment = | ||
421 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
422 | num_tpcs) << 2) + | ||
423 | (g->netlist_vars->ctxsw_regs.gpc.count << 2); | ||
424 | } | ||
425 | |||
426 | /* aligned to next 256 byte */ | ||
427 | offset_in_segment = ALIGN(offset_in_segment, 256); | ||
428 | |||
429 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg, | ||
430 | "egpc etpc offset_in_segment 0x%#08x", | ||
431 | offset_in_segment); | ||
432 | } else { | ||
433 | nvgpu_log_fn(g, "Unknown address type."); | ||
434 | return -EINVAL; | ||
435 | } | ||
436 | |||
437 | *__offset_in_segment = offset_in_segment; | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static void gr_tu104_set_sm_disp_ctrl(struct gk20a *g, u32 data) | ||
442 | { | ||
443 | u32 reg_val; | ||
444 | |||
445 | nvgpu_log_fn(g, " "); | ||
446 | |||
447 | reg_val = nvgpu_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); | ||
448 | |||
449 | if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK) | ||
450 | == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_DISABLE) { | ||
451 | reg_val = set_field(reg_val, | ||
452 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(), | ||
453 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_disable_f() | ||
454 | ); | ||
455 | } else if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK) | ||
456 | == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_ENABLE) { | ||
457 | reg_val = set_field(reg_val, | ||
458 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(), | ||
459 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_enable_f() | ||
460 | ); | ||
461 | } | ||
462 | |||
463 | nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), reg_val); | ||
464 | } | ||
465 | |||
466 | int gr_tu104_handle_sw_method(struct gk20a *g, u32 addr, | ||
467 | u32 class_num, u32 offset, u32 data) | ||
468 | { | ||
469 | nvgpu_log_fn(g, " "); | ||
470 | |||
471 | if (class_num == TURING_COMPUTE_A) { | ||
472 | switch (offset << 2) { | ||
473 | case NVC5C0_SET_SHADER_EXCEPTIONS: | ||
474 | gv11b_gr_set_shader_exceptions(g, data); | ||
475 | break; | ||
476 | case NVC5C0_SET_SKEDCHECK: | ||
477 | gr_gv11b_set_skedcheck(g, data); | ||
478 | break; | ||
479 | case NVC5C0_SET_SM_DISP_CTRL: | ||
480 | gr_tu104_set_sm_disp_ctrl(g, data); | ||
481 | break; | ||
482 | case NVC5C0_SET_SHADER_CUT_COLLECTOR: | ||
483 | gr_gv11b_set_shader_cut_collector(g, data); | ||
484 | break; | ||
485 | default: | ||
486 | goto fail; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | if (class_num == TURING_A) { | ||
491 | switch (offset << 2) { | ||
492 | case NVC597_SET_SHADER_EXCEPTIONS: | ||
493 | gv11b_gr_set_shader_exceptions(g, data); | ||
494 | break; | ||
495 | case NVC597_SET_CIRCULAR_BUFFER_SIZE: | ||
496 | g->ops.gr.set_circular_buffer_size(g, data); | ||
497 | break; | ||
498 | case NVC597_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
499 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
500 | break; | ||
501 | case NVC597_SET_GO_IDLE_TIMEOUT: | ||
502 | gr_gv11b_set_go_idle_timeout(g, data); | ||
503 | break; | ||
504 | case NVC097_SET_COALESCE_BUFFER_SIZE: | ||
505 | gr_gv11b_set_coalesce_buffer_size(g, data); | ||
506 | break; | ||
507 | case NVC597_SET_TEX_IN_DBG: | ||
508 | gr_gv11b_set_tex_in_dbg(g, data); | ||
509 | break; | ||
510 | case NVC597_SET_SKEDCHECK: | ||
511 | gr_gv11b_set_skedcheck(g, data); | ||
512 | break; | ||
513 | case NVC597_SET_BES_CROP_DEBUG3: | ||
514 | g->ops.gr.set_bes_crop_debug3(g, data); | ||
515 | break; | ||
516 | case NVC597_SET_BES_CROP_DEBUG4: | ||
517 | g->ops.gr.set_bes_crop_debug4(g, data); | ||
518 | break; | ||
519 | case NVC597_SET_SM_DISP_CTRL: | ||
520 | gr_tu104_set_sm_disp_ctrl(g, data); | ||
521 | break; | ||
522 | case NVC597_SET_SHADER_CUT_COLLECTOR: | ||
523 | gr_gv11b_set_shader_cut_collector(g, data); | ||
524 | break; | ||
525 | default: | ||
526 | goto fail; | ||
527 | } | ||
528 | } | ||
529 | return 0; | ||
530 | |||
531 | fail: | ||
532 | return -EINVAL; | ||
533 | } | ||
534 | |||
535 | void gr_tu104_init_sm_dsm_reg_info(void) | ||
536 | { | ||
537 | return; | ||
538 | } | ||
539 | |||
540 | void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
541 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
542 | u32 **sm_dsm_perf_ctrl_regs, | ||
543 | u32 *ctrl_register_stride) | ||
544 | { | ||
545 | *num_sm_dsm_perf_ctrl_regs = 0; | ||
546 | *sm_dsm_perf_ctrl_regs = NULL; | ||
547 | *ctrl_register_stride = 0; | ||
548 | } | ||
549 | >>>>>>> CHANGE (f0762e gpu: nvgpu: add speculative barrier) | ||