diff options
Diffstat (limited to 'include/gk20a/gr_gk20a.h')
-rw-r--r-- | include/gk20a/gr_gk20a.h | 852 |
1 files changed, 0 insertions, 852 deletions
diff --git a/include/gk20a/gr_gk20a.h b/include/gk20a/gr_gk20a.h deleted file mode 100644 index 2cd6a4f..0000000 --- a/include/gk20a/gr_gk20a.h +++ /dev/null | |||
@@ -1,852 +0,0 @@ | |||
1 | /* | ||
2 | * GK20A Graphics Engine | ||
3 | * | ||
4 | * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | #ifndef GR_GK20A_H | ||
25 | #define GR_GK20A_H | ||
26 | |||
27 | #include <nvgpu/types.h> | ||
28 | |||
29 | #include "gr_ctx_gk20a.h" | ||
30 | #include "mm_gk20a.h" | ||
31 | #include <nvgpu/power_features/pg.h> | ||
32 | |||
33 | #include <nvgpu/comptags.h> | ||
34 | #include <nvgpu/cond.h> | ||
35 | |||
36 | #define GR_IDLE_CHECK_DEFAULT 10 /* usec */ | ||
37 | #define GR_IDLE_CHECK_MAX 200 /* usec */ | ||
38 | #define GR_FECS_POLL_INTERVAL 5 /* usec */ | ||
39 | |||
40 | #define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF | ||
41 | #define INVALID_MAX_WAYS 0xFFFFFFFF | ||
42 | |||
43 | #define GK20A_FECS_UCODE_IMAGE "fecs.bin" | ||
44 | #define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" | ||
45 | |||
46 | #define GK20A_GR_MAX_PES_PER_GPC 3 | ||
47 | |||
48 | #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ | ||
49 | |||
50 | /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ | ||
51 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) | ||
52 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) | ||
53 | |||
54 | /* | ||
55 | * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries | ||
56 | * of address and data pairs | ||
57 | */ | ||
58 | #define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2 | ||
59 | #define PATCH_CTX_SLOTS_PER_PAGE \ | ||
60 | (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32))) | ||
61 | #define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32)) | ||
62 | |||
63 | #define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI (1 << 0) | ||
64 | #define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP (1 << 1) | ||
65 | |||
66 | #define NVGPU_PREEMPTION_MODE_COMPUTE_WFI (1 << 0) | ||
67 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) | ||
68 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) | ||
69 | |||
70 | #define CTXSW_INTR0 BIT32(0) | ||
71 | #define CTXSW_INTR1 BIT32(1) | ||
72 | |||
73 | #define MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL 0x26 | ||
74 | |||
75 | struct tsg_gk20a; | ||
76 | struct channel_gk20a; | ||
77 | struct nvgpu_warpstate; | ||
78 | |||
79 | enum ctxsw_addr_type; | ||
80 | |||
81 | enum /* global_ctx_buffer */ { | ||
82 | CIRCULAR = 0, | ||
83 | PAGEPOOL = 1, | ||
84 | ATTRIBUTE = 2, | ||
85 | CIRCULAR_VPR = 3, | ||
86 | PAGEPOOL_VPR = 4, | ||
87 | ATTRIBUTE_VPR = 5, | ||
88 | GOLDEN_CTX = 6, | ||
89 | PRIV_ACCESS_MAP = 7, | ||
90 | /* #8 is reserved */ | ||
91 | FECS_TRACE_BUFFER = 9, | ||
92 | NR_GLOBAL_CTX_BUF = 10 | ||
93 | }; | ||
94 | |||
95 | /* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */ | ||
96 | enum /*global_ctx_buffer_va */ { | ||
97 | CIRCULAR_VA = 0, | ||
98 | PAGEPOOL_VA = 1, | ||
99 | ATTRIBUTE_VA = 2, | ||
100 | GOLDEN_CTX_VA = 3, | ||
101 | PRIV_ACCESS_MAP_VA = 4, | ||
102 | /* #5 is reserved */ | ||
103 | FECS_TRACE_BUFFER_VA = 6, | ||
104 | NR_GLOBAL_CTX_BUF_VA = 7 | ||
105 | }; | ||
106 | |||
107 | enum { | ||
108 | WAIT_UCODE_LOOP, | ||
109 | WAIT_UCODE_TIMEOUT, | ||
110 | WAIT_UCODE_ERROR, | ||
111 | WAIT_UCODE_OK | ||
112 | }; | ||
113 | |||
114 | enum { | ||
115 | GR_IS_UCODE_OP_EQUAL, | ||
116 | GR_IS_UCODE_OP_NOT_EQUAL, | ||
117 | GR_IS_UCODE_OP_AND, | ||
118 | GR_IS_UCODE_OP_LESSER, | ||
119 | GR_IS_UCODE_OP_LESSER_EQUAL, | ||
120 | GR_IS_UCODE_OP_SKIP | ||
121 | }; | ||
122 | |||
123 | enum { | ||
124 | eUcodeHandshakeInitComplete = 1, | ||
125 | eUcodeHandshakeMethodFinished | ||
126 | }; | ||
127 | |||
128 | enum { | ||
129 | ELCG_MODE = (1 << 0), | ||
130 | BLCG_MODE = (1 << 1), | ||
131 | INVALID_MODE = (1 << 2) | ||
132 | }; | ||
133 | |||
134 | enum { | ||
135 | NVGPU_EVENT_ID_BPT_INT = 0, | ||
136 | NVGPU_EVENT_ID_BPT_PAUSE, | ||
137 | NVGPU_EVENT_ID_BLOCKING_SYNC, | ||
138 | NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED, | ||
139 | NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE, | ||
140 | NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN, | ||
141 | NVGPU_EVENT_ID_MAX, | ||
142 | }; | ||
143 | |||
144 | #ifndef GR_GO_IDLE_BUNDLE | ||
145 | #define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */ | ||
146 | #endif | ||
147 | |||
148 | struct gr_channel_map_tlb_entry { | ||
149 | u32 curr_ctx; | ||
150 | u32 chid; | ||
151 | u32 tsgid; | ||
152 | }; | ||
153 | |||
154 | struct gr_zcull_gk20a { | ||
155 | u32 aliquot_width; | ||
156 | u32 aliquot_height; | ||
157 | u32 aliquot_size; | ||
158 | u32 total_aliquots; | ||
159 | |||
160 | u32 width_align_pixels; | ||
161 | u32 height_align_pixels; | ||
162 | u32 pixel_squares_by_aliquots; | ||
163 | }; | ||
164 | |||
165 | struct gr_zcull_info { | ||
166 | u32 width_align_pixels; | ||
167 | u32 height_align_pixels; | ||
168 | u32 pixel_squares_by_aliquots; | ||
169 | u32 aliquot_total; | ||
170 | u32 region_byte_multiplier; | ||
171 | u32 region_header_size; | ||
172 | u32 subregion_header_size; | ||
173 | u32 subregion_width_align_pixels; | ||
174 | u32 subregion_height_align_pixels; | ||
175 | u32 subregion_count; | ||
176 | }; | ||
177 | |||
178 | #define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */ | ||
179 | |||
180 | #define GK20A_STARTOF_ZBC_TABLE 1U /* index zero reserved to indicate "not ZBCd" */ | ||
181 | #define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */ | ||
182 | #define GK20A_ZBC_TABLE_SIZE (16 - 1) | ||
183 | |||
184 | #define GK20A_ZBC_TYPE_INVALID 0 | ||
185 | #define GK20A_ZBC_TYPE_COLOR 1 | ||
186 | #define GK20A_ZBC_TYPE_DEPTH 2 | ||
187 | #define T19X_ZBC 3 | ||
188 | |||
189 | struct zbc_color_table { | ||
190 | u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
191 | u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
192 | u32 format; | ||
193 | u32 ref_cnt; | ||
194 | }; | ||
195 | |||
196 | struct zbc_depth_table { | ||
197 | u32 depth; | ||
198 | u32 format; | ||
199 | u32 ref_cnt; | ||
200 | }; | ||
201 | |||
202 | struct zbc_s_table { | ||
203 | u32 stencil; | ||
204 | u32 format; | ||
205 | u32 ref_cnt; | ||
206 | }; | ||
207 | |||
208 | struct zbc_entry { | ||
209 | u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
210 | u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
211 | u32 depth; | ||
212 | u32 type; /* color or depth */ | ||
213 | u32 format; | ||
214 | }; | ||
215 | |||
216 | struct zbc_query_params { | ||
217 | u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
218 | u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
219 | u32 depth; | ||
220 | u32 ref_cnt; | ||
221 | u32 format; | ||
222 | u32 type; /* color or depth */ | ||
223 | u32 index_size; /* [out] size, [in] index */ | ||
224 | }; | ||
225 | |||
226 | struct sm_info { | ||
227 | u32 gpc_index; | ||
228 | u32 tpc_index; | ||
229 | u32 sm_index; | ||
230 | u32 global_tpc_index; | ||
231 | }; | ||
232 | |||
233 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
234 | struct gk20a_cs_snapshot_client; | ||
235 | struct gk20a_cs_snapshot; | ||
236 | #endif | ||
237 | |||
238 | struct gr_gk20a_isr_data { | ||
239 | u32 addr; | ||
240 | u32 data_lo; | ||
241 | u32 data_hi; | ||
242 | u32 curr_ctx; | ||
243 | struct channel_gk20a *ch; | ||
244 | u32 offset; | ||
245 | u32 sub_chan; | ||
246 | u32 class_num; | ||
247 | }; | ||
248 | |||
249 | struct gr_ctx_buffer_desc { | ||
250 | void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *); | ||
251 | struct nvgpu_mem mem; | ||
252 | void *priv; | ||
253 | }; | ||
254 | |||
255 | struct nvgpu_preemption_modes_rec { | ||
256 | u32 graphics_preemption_mode_flags; /* supported preemption modes */ | ||
257 | u32 compute_preemption_mode_flags; /* supported preemption modes */ | ||
258 | |||
259 | u32 default_graphics_preempt_mode; /* default mode */ | ||
260 | u32 default_compute_preempt_mode; /* default mode */ | ||
261 | }; | ||
262 | |||
263 | struct gr_gk20a { | ||
264 | struct gk20a *g; | ||
265 | struct { | ||
266 | bool dynamic; | ||
267 | |||
268 | u32 buffer_size; | ||
269 | u32 buffer_total_size; | ||
270 | |||
271 | bool golden_image_initialized; | ||
272 | u32 golden_image_size; | ||
273 | u32 *local_golden_image; | ||
274 | |||
275 | u32 hwpm_ctxsw_buffer_offset_map_count; | ||
276 | struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map; | ||
277 | |||
278 | u32 zcull_ctxsw_image_size; | ||
279 | |||
280 | u32 pm_ctxsw_image_size; | ||
281 | |||
282 | u32 buffer_header_size; | ||
283 | |||
284 | u32 priv_access_map_size; | ||
285 | |||
286 | u32 fecs_trace_buffer_size; | ||
287 | |||
288 | struct gr_ucode_gk20a ucode; | ||
289 | |||
290 | struct av_list_gk20a sw_bundle_init; | ||
291 | struct av_list_gk20a sw_method_init; | ||
292 | struct aiv_list_gk20a sw_ctx_load; | ||
293 | struct av_list_gk20a sw_non_ctx_load; | ||
294 | struct av_list_gk20a sw_veid_bundle_init; | ||
295 | struct av64_list_gk20a sw_bundle64_init; | ||
296 | struct { | ||
297 | struct aiv_list_gk20a sys; | ||
298 | struct aiv_list_gk20a gpc; | ||
299 | struct aiv_list_gk20a tpc; | ||
300 | struct aiv_list_gk20a zcull_gpc; | ||
301 | struct aiv_list_gk20a ppc; | ||
302 | struct aiv_list_gk20a pm_sys; | ||
303 | struct aiv_list_gk20a pm_gpc; | ||
304 | struct aiv_list_gk20a pm_tpc; | ||
305 | struct aiv_list_gk20a pm_ppc; | ||
306 | struct aiv_list_gk20a perf_sys; | ||
307 | struct aiv_list_gk20a perf_gpc; | ||
308 | struct aiv_list_gk20a fbp; | ||
309 | struct aiv_list_gk20a fbp_router; | ||
310 | struct aiv_list_gk20a gpc_router; | ||
311 | struct aiv_list_gk20a pm_ltc; | ||
312 | struct aiv_list_gk20a pm_fbpa; | ||
313 | struct aiv_list_gk20a perf_sys_router; | ||
314 | struct aiv_list_gk20a perf_pma; | ||
315 | struct aiv_list_gk20a pm_rop; | ||
316 | struct aiv_list_gk20a pm_ucgpc; | ||
317 | struct aiv_list_gk20a etpc; | ||
318 | struct aiv_list_gk20a pm_cau; | ||
319 | } ctxsw_regs; | ||
320 | u32 regs_base_index; | ||
321 | bool valid; | ||
322 | |||
323 | u32 preempt_image_size; | ||
324 | bool force_preemption_gfxp; | ||
325 | bool force_preemption_cilp; | ||
326 | bool dump_ctxsw_stats_on_channel_close; | ||
327 | } ctx_vars; | ||
328 | |||
329 | struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */ | ||
330 | struct nvgpu_mutex fecs_mutex; /* protect fecs method */ | ||
331 | |||
332 | #define GR_NETLIST_DYNAMIC -1 | ||
333 | #define GR_NETLIST_STATIC_A 'A' | ||
334 | int netlist; | ||
335 | |||
336 | struct nvgpu_cond init_wq; | ||
337 | int initialized; | ||
338 | |||
339 | u32 num_fbps; | ||
340 | |||
341 | u32 max_comptag_lines; | ||
342 | u32 compbit_backing_size; | ||
343 | u32 comptags_per_cacheline; | ||
344 | u32 slices_per_ltc; | ||
345 | u32 cacheline_size; | ||
346 | u32 gobs_per_comptagline_per_slice; | ||
347 | |||
348 | u32 max_gpc_count; | ||
349 | u32 max_fbps_count; | ||
350 | u32 max_tpc_per_gpc_count; | ||
351 | u32 max_zcull_per_gpc_count; | ||
352 | u32 max_tpc_count; | ||
353 | |||
354 | u32 sys_count; | ||
355 | u32 gpc_count; | ||
356 | u32 pe_count_per_gpc; | ||
357 | u32 ppc_count; | ||
358 | u32 *gpc_ppc_count; | ||
359 | u32 tpc_count; | ||
360 | u32 *gpc_tpc_count; | ||
361 | u32 *gpc_tpc_mask; | ||
362 | u32 zcb_count; | ||
363 | u32 *gpc_zcb_count; | ||
364 | u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; | ||
365 | u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; | ||
366 | u32 *gpc_skip_mask; | ||
367 | |||
368 | u32 bundle_cb_default_size; | ||
369 | u32 min_gpm_fifo_depth; | ||
370 | u32 bundle_cb_token_limit; | ||
371 | u32 attrib_cb_default_size; | ||
372 | u32 attrib_cb_size; | ||
373 | u32 attrib_cb_gfxp_default_size; | ||
374 | u32 attrib_cb_gfxp_size; | ||
375 | u32 alpha_cb_default_size; | ||
376 | u32 alpha_cb_size; | ||
377 | u32 timeslice_mode; | ||
378 | u32 czf_bypass; | ||
379 | u32 pd_max_batches; | ||
380 | u32 gfxp_wfi_timeout_count; | ||
381 | u32 gfxp_wfi_timeout_unit; | ||
382 | |||
383 | /* | ||
384 | * The deductible memory size for max_comptag_mem (in MBytes) | ||
385 | * Usually close to memory size that running system is taking | ||
386 | */ | ||
387 | u32 comptag_mem_deduct; | ||
388 | |||
389 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; | ||
390 | |||
391 | u8 *map_tiles; | ||
392 | u32 map_tile_count; | ||
393 | u32 map_row_offset; | ||
394 | |||
395 | u32 max_comptag_mem; /* max memory size (MB) for comptag */ | ||
396 | struct compbit_store_desc compbit_store; | ||
397 | struct gk20a_comptag_allocator comp_tags; | ||
398 | |||
399 | struct gr_zcull_gk20a zcull; | ||
400 | |||
401 | struct nvgpu_mutex zbc_lock; | ||
402 | struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE]; | ||
403 | struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE]; | ||
404 | struct zbc_s_table zbc_s_tbl[GK20A_ZBC_TABLE_SIZE]; | ||
405 | s32 max_default_color_index; | ||
406 | s32 max_default_depth_index; | ||
407 | s32 max_default_s_index; | ||
408 | |||
409 | u32 max_used_color_index; | ||
410 | u32 max_used_depth_index; | ||
411 | u32 max_used_s_index; | ||
412 | |||
413 | #define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */ | ||
414 | struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE]; | ||
415 | u32 channel_tlb_flush_index; | ||
416 | struct nvgpu_spinlock ch_tlb_lock; | ||
417 | |||
418 | void (*remove_support)(struct gr_gk20a *gr); | ||
419 | bool sw_ready; | ||
420 | bool skip_ucode_init; | ||
421 | |||
422 | struct nvgpu_preemption_modes_rec preemption_mode_rec; | ||
423 | |||
424 | u32 fecs_feature_override_ecc_val; | ||
425 | |||
426 | int cilp_preempt_pending_chid; | ||
427 | |||
428 | u32 fbp_en_mask; | ||
429 | u32 *fbp_rop_l2_en_mask; | ||
430 | u32 no_of_sm; | ||
431 | struct sm_info *sm_to_cluster; | ||
432 | |||
433 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
434 | struct nvgpu_mutex cs_lock; | ||
435 | struct gk20a_cs_snapshot *cs_data; | ||
436 | #endif | ||
437 | u32 max_css_buffer_size; | ||
438 | }; | ||
439 | |||
440 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); | ||
441 | void gk20a_gpccs_dump_falcon_stats(struct gk20a *g); | ||
442 | |||
443 | /* contexts associated with a TSG */ | ||
444 | struct nvgpu_gr_ctx { | ||
445 | struct nvgpu_mem mem; | ||
446 | |||
447 | u32 graphics_preempt_mode; | ||
448 | u32 compute_preempt_mode; | ||
449 | |||
450 | struct nvgpu_mem preempt_ctxsw_buffer; | ||
451 | struct nvgpu_mem spill_ctxsw_buffer; | ||
452 | struct nvgpu_mem betacb_ctxsw_buffer; | ||
453 | struct nvgpu_mem pagepool_ctxsw_buffer; | ||
454 | u32 ctx_id; | ||
455 | bool ctx_id_valid; | ||
456 | bool cilp_preempt_pending; | ||
457 | bool boosted_ctx; | ||
458 | bool golden_img_loaded; | ||
459 | |||
460 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
461 | u64 virt_ctx; | ||
462 | #endif | ||
463 | |||
464 | struct patch_desc patch_ctx; | ||
465 | struct zcull_ctx_desc zcull_ctx; | ||
466 | struct pm_ctx_desc pm_ctx; | ||
467 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | ||
468 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | ||
469 | int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; | ||
470 | bool global_ctx_buffer_mapped; | ||
471 | |||
472 | u32 tsgid; | ||
473 | }; | ||
474 | |||
475 | struct gk20a_ctxsw_ucode_segment { | ||
476 | u32 offset; | ||
477 | u32 size; | ||
478 | }; | ||
479 | |||
480 | struct gk20a_ctxsw_ucode_segments { | ||
481 | u32 boot_entry; | ||
482 | u32 boot_imem_offset; | ||
483 | u32 boot_signature; | ||
484 | struct gk20a_ctxsw_ucode_segment boot; | ||
485 | struct gk20a_ctxsw_ucode_segment code; | ||
486 | struct gk20a_ctxsw_ucode_segment data; | ||
487 | }; | ||
488 | |||
489 | /* sums over the ucode files as sequences of u32, computed to the | ||
490 | * boot_signature field in the structure above */ | ||
491 | |||
492 | /* T18X FECS remains same as T21X, | ||
493 | * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used | ||
494 | * for T18X*/ | ||
495 | #define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34 | ||
496 | #define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5c | ||
497 | #define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5c | ||
498 | #define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78 | ||
499 | #define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b | ||
500 | #define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f | ||
501 | |||
502 | #define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2 | ||
503 | #define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5 | ||
504 | #define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3 | ||
505 | #define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877 | ||
506 | |||
507 | #define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d | ||
508 | #define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10 | ||
509 | |||
510 | #define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da | ||
511 | |||
512 | struct gk20a_ctxsw_ucode_info { | ||
513 | u64 *p_va; | ||
514 | struct nvgpu_mem inst_blk_desc; | ||
515 | struct nvgpu_mem surface_desc; | ||
516 | struct gk20a_ctxsw_ucode_segments fecs; | ||
517 | struct gk20a_ctxsw_ucode_segments gpccs; | ||
518 | }; | ||
519 | |||
520 | struct gk20a_ctxsw_bootloader_desc { | ||
521 | u32 start_offset; | ||
522 | u32 size; | ||
523 | u32 imem_offset; | ||
524 | u32 entry_point; | ||
525 | }; | ||
526 | |||
527 | struct fecs_method_op_gk20a { | ||
528 | struct { | ||
529 | u32 addr; | ||
530 | u32 data; | ||
531 | } method; | ||
532 | |||
533 | struct { | ||
534 | u32 id; | ||
535 | u32 data; | ||
536 | u32 clr; | ||
537 | u32 *ret; | ||
538 | u32 ok; | ||
539 | u32 fail; | ||
540 | } mailbox; | ||
541 | |||
542 | struct { | ||
543 | u32 ok; | ||
544 | u32 fail; | ||
545 | } cond; | ||
546 | |||
547 | }; | ||
548 | |||
549 | struct nvgpu_warpstate { | ||
550 | u64 valid_warps[2]; | ||
551 | u64 trapped_warps[2]; | ||
552 | u64 paused_warps[2]; | ||
553 | }; | ||
554 | |||
555 | struct gpu_ops; | ||
556 | int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | ||
557 | struct channel_gk20a *c); | ||
558 | void gk20a_init_gr(struct gk20a *g); | ||
559 | int gk20a_init_gr_support(struct gk20a *g); | ||
560 | int gk20a_enable_gr_hw(struct gk20a *g); | ||
561 | int gk20a_gr_reset(struct gk20a *g); | ||
562 | void gk20a_gr_wait_initialized(struct gk20a *g); | ||
563 | |||
564 | int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); | ||
565 | |||
566 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); | ||
567 | |||
568 | int gk20a_gr_isr(struct gk20a *g); | ||
569 | u32 gk20a_gr_nonstall_isr(struct gk20a *g); | ||
570 | |||
571 | /* zcull */ | ||
572 | u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr); | ||
573 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | ||
574 | struct channel_gk20a *c, u64 zcull_va, u32 mode); | ||
575 | int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | ||
576 | struct gr_zcull_info *zcull_params); | ||
577 | void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, | ||
578 | u32 *zcull_map_tiles); | ||
579 | /* zbc */ | ||
580 | int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | ||
581 | struct zbc_entry *zbc_val); | ||
582 | int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, | ||
583 | struct zbc_query_params *query_params); | ||
584 | int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, | ||
585 | struct zbc_entry *zbc_val); | ||
586 | int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr); | ||
587 | |||
588 | /* pmu */ | ||
589 | int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); | ||
590 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, | ||
591 | struct nvgpu_mem *inst_block); | ||
592 | int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); | ||
593 | |||
594 | void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config); | ||
595 | |||
596 | /* sm */ | ||
597 | bool gk20a_gr_sm_debugger_attached(struct gk20a *g); | ||
598 | u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g); | ||
599 | |||
600 | #define gr_gk20a_elpg_protected_call(g, func) \ | ||
601 | ({ \ | ||
602 | int err = 0; \ | ||
603 | if (g->support_pmu) {\ | ||
604 | err = nvgpu_pg_elpg_disable(g);\ | ||
605 | if (err != 0) {\ | ||
606 | (void)nvgpu_pg_elpg_enable(g); \ | ||
607 | } \ | ||
608 | } \ | ||
609 | if (err == 0) { \ | ||
610 | err = func; \ | ||
611 | if (g->support_pmu) {\ | ||
612 | (void)nvgpu_pg_elpg_enable(g); \ | ||
613 | } \ | ||
614 | } \ | ||
615 | err; \ | ||
616 | }) | ||
617 | |||
618 | int gk20a_gr_suspend(struct gk20a *g); | ||
619 | |||
620 | struct nvgpu_dbg_reg_op; | ||
621 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | ||
622 | struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, | ||
623 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, | ||
624 | bool *is_curr_ctx); | ||
625 | int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | ||
626 | struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, | ||
627 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, | ||
628 | bool ch_is_curr_ctx); | ||
629 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | ||
630 | u32 addr, | ||
631 | u32 max_offsets, | ||
632 | u32 *offsets, u32 *offset_addrs, | ||
633 | u32 *num_offsets, | ||
634 | bool is_quad, u32 quad); | ||
635 | int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, | ||
636 | u32 addr, | ||
637 | u32 max_offsets, | ||
638 | u32 *offsets, u32 *offset_addrs, | ||
639 | u32 *num_offsets); | ||
640 | int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | ||
641 | struct channel_gk20a *c, | ||
642 | bool enable_smpc_ctxsw); | ||
643 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | ||
644 | struct channel_gk20a *c, | ||
645 | u64 gpu_va, | ||
646 | u32 mode); | ||
647 | |||
648 | struct nvgpu_gr_ctx; | ||
649 | void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, | ||
650 | u32 addr, u32 data, bool patch); | ||
651 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | ||
652 | struct nvgpu_gr_ctx *ch_ctx, | ||
653 | bool update_patch_count); | ||
654 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | ||
655 | struct nvgpu_gr_ctx *ch_ctx, | ||
656 | bool update_patch_count); | ||
657 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | ||
658 | struct nvgpu_gr_ctx *ch_ctx, | ||
659 | u64 addr, u32 size, bool patch); | ||
660 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | ||
661 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | ||
662 | int gr_gk20a_init_fs_state(struct gk20a *g); | ||
663 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | ||
664 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); | ||
665 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); | ||
666 | void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); | ||
667 | void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, | ||
668 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); | ||
669 | void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, | ||
670 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); | ||
671 | |||
672 | |||
673 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); | ||
674 | int gr_gk20a_disable_ctxsw(struct gk20a *g); | ||
675 | int gr_gk20a_enable_ctxsw(struct gk20a *g); | ||
676 | void gk20a_gr_resume_single_sm(struct gk20a *g, | ||
677 | u32 gpc, u32 tpc, u32 sm); | ||
678 | void gk20a_gr_resume_all_sms(struct gk20a *g); | ||
679 | void gk20a_gr_suspend_single_sm(struct gk20a *g, | ||
680 | u32 gpc, u32 tpc, u32 sm, | ||
681 | u32 global_esr_mask, bool check_errors); | ||
682 | void gk20a_gr_suspend_all_sms(struct gk20a *g, | ||
683 | u32 global_esr_mask, bool check_errors); | ||
684 | u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index); | ||
685 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | ||
686 | struct channel_gk20a *ch, u64 sms, bool enable); | ||
687 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); | ||
688 | int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, | ||
689 | struct zbc_entry *color_val, u32 index); | ||
690 | int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, | ||
691 | struct zbc_entry *depth_val, u32 index); | ||
692 | int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, | ||
693 | struct zbc_entry *zbc_val); | ||
694 | void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); | ||
695 | int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms, | ||
696 | u32 expect_delay); | ||
697 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
698 | bool *post_event, struct channel_gk20a *fault_ch, | ||
699 | u32 *hww_global_esr); | ||
700 | int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
701 | bool *post_event); | ||
702 | int gr_gk20a_init_ctx_state(struct gk20a *g); | ||
703 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | ||
704 | struct fecs_method_op_gk20a op, | ||
705 | bool sleepduringwait); | ||
706 | int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g, | ||
707 | struct fecs_method_op_gk20a op, | ||
708 | bool sleepduringwait); | ||
709 | int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, | ||
710 | struct fecs_method_op_gk20a op); | ||
711 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | ||
712 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, | ||
713 | u32 class, u32 padding); | ||
714 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | ||
715 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); | ||
716 | int gr_gk20a_halt_pipe(struct gk20a *g); | ||
717 | |||
718 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
719 | int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ | ||
720 | u32 perfmon_id_count, /* in - number of perfmons*/ | ||
721 | u32 *perfmon_id_start, /* out- index of first pm */ | ||
722 | /* in/out - pointer to client data used in later */ | ||
723 | struct gk20a_cs_snapshot_client *css_client); | ||
724 | |||
725 | int gr_gk20a_css_detach(struct channel_gk20a *ch, | ||
726 | struct gk20a_cs_snapshot_client *css_client); | ||
727 | int gr_gk20a_css_flush(struct channel_gk20a *ch, | ||
728 | struct gk20a_cs_snapshot_client *css_client); | ||
729 | |||
730 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); | ||
731 | |||
732 | #else | ||
733 | /* fake empty cleanup function if no cyclestats snapshots enabled */ | ||
734 | static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | ||
735 | { | ||
736 | (void)g; | ||
737 | } | ||
738 | #endif | ||
739 | |||
740 | void gr_gk20a_fecs_host_int_enable(struct gk20a *g); | ||
741 | int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, | ||
742 | struct gr_gk20a_isr_data *isr_data); | ||
743 | int gk20a_gr_lock_down_sm(struct gk20a *g, | ||
744 | u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, | ||
745 | bool check_errors); | ||
746 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
747 | u32 global_esr_mask, bool check_errors); | ||
748 | int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | ||
749 | u32 *mailbox_ret, u32 opc_success, | ||
750 | u32 mailbox_ok, u32 opc_fail, | ||
751 | u32 mailbox_fail, bool sleepduringwait); | ||
752 | |||
753 | int gr_gk20a_get_ctx_id(struct gk20a *g, | ||
754 | struct channel_gk20a *c, | ||
755 | u32 *ctx_id); | ||
756 | |||
757 | u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); | ||
758 | u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); | ||
759 | |||
760 | int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms, | ||
761 | u32 expect_delay); | ||
762 | |||
763 | struct dbg_session_gk20a; | ||
764 | |||
765 | bool gr_gk20a_suspend_context(struct channel_gk20a *ch); | ||
766 | bool gr_gk20a_resume_context(struct channel_gk20a *ch); | ||
767 | int gr_gk20a_suspend_contexts(struct gk20a *g, | ||
768 | struct dbg_session_gk20a *dbg_s, | ||
769 | int *ctx_resident_ch_fd); | ||
770 | int gr_gk20a_resume_contexts(struct gk20a *g, | ||
771 | struct dbg_session_gk20a *dbg_s, | ||
772 | int *ctx_resident_ch_fd); | ||
773 | void gk20a_gr_enable_gpc_exceptions(struct gk20a *g); | ||
774 | void gk20a_gr_enable_exceptions(struct gk20a *g); | ||
775 | int gr_gk20a_trigger_suspend(struct gk20a *g); | ||
776 | int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state); | ||
777 | int gr_gk20a_resume_from_pause(struct gk20a *g); | ||
778 | int gr_gk20a_clear_sm_errors(struct gk20a *g); | ||
779 | u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g); | ||
780 | |||
781 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c); | ||
782 | |||
783 | int gr_gk20a_init_sm_id_table(struct gk20a *g); | ||
784 | |||
785 | int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); | ||
786 | |||
787 | void gr_gk20a_write_zcull_ptr(struct gk20a *g, | ||
788 | struct nvgpu_mem *mem, u64 gpu_va); | ||
789 | |||
790 | void gr_gk20a_write_pm_ptr(struct gk20a *g, | ||
791 | struct nvgpu_mem *mem, u64 gpu_va); | ||
792 | |||
793 | u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc); | ||
794 | u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc); | ||
795 | void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, | ||
796 | u32 *esr_sm_sel); | ||
797 | void gk20a_gr_init_ovr_sm_dsm_perf(void); | ||
798 | void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, | ||
799 | u32 **ovr_perf_regs); | ||
800 | void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g, | ||
801 | struct nvgpu_mem *mem); | ||
802 | u32 gr_gk20a_get_patch_slots(struct gk20a *g); | ||
803 | int gk20a_gr_handle_notify_pending(struct gk20a *g, | ||
804 | struct gr_gk20a_isr_data *isr_data); | ||
805 | |||
806 | int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); | ||
807 | int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | ||
808 | struct channel_gk20a *c); | ||
809 | int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | ||
810 | struct channel_gk20a *c, bool patch); | ||
811 | |||
812 | int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | ||
813 | struct channel_gk20a *c); | ||
814 | u32 gk20a_init_sw_bundle(struct gk20a *g); | ||
815 | int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type); | ||
816 | int gk20a_gr_handle_semaphore_pending(struct gk20a *g, | ||
817 | struct gr_gk20a_isr_data *isr_data); | ||
818 | int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g, | ||
819 | struct ctxsw_buf_offset_map_entry *map, | ||
820 | struct aiv_list_gk20a *regs, | ||
821 | u32 *count, u32 *offset, | ||
822 | u32 max_cnt, u32 base, | ||
823 | u32 num_fbpas, u32 stride, u32 mask); | ||
824 | int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, | ||
825 | struct aiv_list_gk20a *regs, | ||
826 | u32 *count, u32 *offset, | ||
827 | u32 max_cnt, u32 base, u32 mask); | ||
828 | int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | ||
829 | enum ctxsw_addr_type *addr_type, | ||
830 | u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, | ||
831 | u32 *broadcast_flags); | ||
832 | int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | ||
833 | u32 gpc_num, | ||
834 | u32 *priv_addr_table, u32 *t); | ||
835 | int gr_gk20a_create_priv_addr_table(struct gk20a *g, | ||
836 | u32 addr, | ||
837 | u32 *priv_addr_table, | ||
838 | u32 *num_registers); | ||
839 | void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | ||
840 | u32 num_fbpas, | ||
841 | u32 *priv_addr_table, u32 *t); | ||
842 | int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g, | ||
843 | enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs, | ||
844 | u32 reg_list_ppc_count, u32 *__offset_in_segment); | ||
845 | |||
846 | void gk20a_gr_destroy_ctx_buffer(struct gk20a *g, | ||
847 | struct gr_ctx_buffer_desc *desc); | ||
848 | int gk20a_gr_alloc_ctx_buffer(struct gk20a *g, | ||
849 | struct gr_ctx_buffer_desc *desc, size_t size); | ||
850 | void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr); | ||
851 | int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g); | ||
852 | #endif /*__GR_GK20A_H__*/ | ||