aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/gr_gk20a.h
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-06-28 18:24:25 -0400
commit01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch)
tree4ef34501728a087be24f4ba0af90f91486bf780b /include/gk20a/gr_gk20a.h
parent306a03d18b305e4e573be3b2931978fa10679eb9 (diff)
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/gk20a/gr_gk20a.h')
-rw-r--r--include/gk20a/gr_gk20a.h851
1 files changed, 851 insertions, 0 deletions
diff --git a/include/gk20a/gr_gk20a.h b/include/gk20a/gr_gk20a.h
new file mode 100644
index 0000000..08b81e8
--- /dev/null
+++ b/include/gk20a/gr_gk20a.h
@@ -0,0 +1,851 @@
1/*
2 * GK20A Graphics Engine
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef GR_GK20A_H
25#define GR_GK20A_H
26
27#include <nvgpu/types.h>
28
29#include "gr_ctx_gk20a.h"
30#include "mm_gk20a.h"
31#include <nvgpu/power_features/pg.h>
32
33#include <nvgpu/comptags.h>
34#include <nvgpu/cond.h>
35
36#define GR_IDLE_CHECK_DEFAULT 10 /* usec */
37#define GR_IDLE_CHECK_MAX 200 /* usec */
38#define GR_FECS_POLL_INTERVAL 5 /* usec */
39
40#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
41#define INVALID_MAX_WAYS 0xFFFFFFFF
42
43#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
44#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
45
46#define GK20A_GR_MAX_PES_PER_GPC 3
47
48#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */
49
50/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
51#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
52#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
53
54/*
55 * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
56 * of address and data pairs
57 */
58#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2
59#define PATCH_CTX_SLOTS_PER_PAGE \
60 (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32)))
61#define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32))
62
63#define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI (1 << 0)
64#define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP (1 << 1)
65
66#define NVGPU_PREEMPTION_MODE_COMPUTE_WFI (1 << 0)
67#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1)
68#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2)
69
70#define CTXSW_INTR0 BIT32(0)
71#define CTXSW_INTR1 BIT32(1)
72
73#define MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL 0x26
74
75struct tsg_gk20a;
76struct channel_gk20a;
77struct nvgpu_warpstate;
78
79enum ctxsw_addr_type;
80
81enum /* global_ctx_buffer */ {
82 CIRCULAR = 0,
83 PAGEPOOL = 1,
84 ATTRIBUTE = 2,
85 CIRCULAR_VPR = 3,
86 PAGEPOOL_VPR = 4,
87 ATTRIBUTE_VPR = 5,
88 GOLDEN_CTX = 6,
89 PRIV_ACCESS_MAP = 7,
90 /* #8 is reserved */
91 FECS_TRACE_BUFFER = 9,
92 NR_GLOBAL_CTX_BUF = 10
93};
94
95/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
96enum /*global_ctx_buffer_va */ {
97 CIRCULAR_VA = 0,
98 PAGEPOOL_VA = 1,
99 ATTRIBUTE_VA = 2,
100 GOLDEN_CTX_VA = 3,
101 PRIV_ACCESS_MAP_VA = 4,
102 /* #5 is reserved */
103 FECS_TRACE_BUFFER_VA = 6,
104 NR_GLOBAL_CTX_BUF_VA = 7
105};
106
107enum {
108 WAIT_UCODE_LOOP,
109 WAIT_UCODE_TIMEOUT,
110 WAIT_UCODE_ERROR,
111 WAIT_UCODE_OK
112};
113
114enum {
115 GR_IS_UCODE_OP_EQUAL,
116 GR_IS_UCODE_OP_NOT_EQUAL,
117 GR_IS_UCODE_OP_AND,
118 GR_IS_UCODE_OP_LESSER,
119 GR_IS_UCODE_OP_LESSER_EQUAL,
120 GR_IS_UCODE_OP_SKIP
121};
122
123enum {
124 eUcodeHandshakeInitComplete = 1,
125 eUcodeHandshakeMethodFinished
126};
127
128enum {
129 ELCG_MODE = (1 << 0),
130 BLCG_MODE = (1 << 1),
131 INVALID_MODE = (1 << 2)
132};
133
134enum {
135 NVGPU_EVENT_ID_BPT_INT = 0,
136 NVGPU_EVENT_ID_BPT_PAUSE,
137 NVGPU_EVENT_ID_BLOCKING_SYNC,
138 NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED,
139 NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE,
140 NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN,
141 NVGPU_EVENT_ID_MAX,
142};
143
144#ifndef GR_GO_IDLE_BUNDLE
145#define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */
146#endif
147
148struct gr_channel_map_tlb_entry {
149 u32 curr_ctx;
150 u32 chid;
151 u32 tsgid;
152};
153
154struct gr_zcull_gk20a {
155 u32 aliquot_width;
156 u32 aliquot_height;
157 u32 aliquot_size;
158 u32 total_aliquots;
159
160 u32 width_align_pixels;
161 u32 height_align_pixels;
162 u32 pixel_squares_by_aliquots;
163};
164
165struct gr_zcull_info {
166 u32 width_align_pixels;
167 u32 height_align_pixels;
168 u32 pixel_squares_by_aliquots;
169 u32 aliquot_total;
170 u32 region_byte_multiplier;
171 u32 region_header_size;
172 u32 subregion_header_size;
173 u32 subregion_width_align_pixels;
174 u32 subregion_height_align_pixels;
175 u32 subregion_count;
176};
177
178#define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */
179
180#define GK20A_STARTOF_ZBC_TABLE 1U /* index zero reserved to indicate "not ZBCd" */
181#define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */
182#define GK20A_ZBC_TABLE_SIZE (16 - 1)
183
184#define GK20A_ZBC_TYPE_INVALID 0
185#define GK20A_ZBC_TYPE_COLOR 1
186#define GK20A_ZBC_TYPE_DEPTH 2
187#define T19X_ZBC 3
188
189struct zbc_color_table {
190 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
191 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
192 u32 format;
193 u32 ref_cnt;
194};
195
196struct zbc_depth_table {
197 u32 depth;
198 u32 format;
199 u32 ref_cnt;
200};
201
202struct zbc_s_table {
203 u32 stencil;
204 u32 format;
205 u32 ref_cnt;
206};
207
208struct zbc_entry {
209 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
210 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
211 u32 depth;
212 u32 type; /* color or depth */
213 u32 format;
214};
215
216struct zbc_query_params {
217 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
218 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
219 u32 depth;
220 u32 ref_cnt;
221 u32 format;
222 u32 type; /* color or depth */
223 u32 index_size; /* [out] size, [in] index */
224};
225
226struct sm_info {
227 u32 gpc_index;
228 u32 tpc_index;
229 u32 sm_index;
230 u32 global_tpc_index;
231};
232
233#if defined(CONFIG_GK20A_CYCLE_STATS)
234struct gk20a_cs_snapshot_client;
235struct gk20a_cs_snapshot;
236#endif
237
238struct gr_gk20a_isr_data {
239 u32 addr;
240 u32 data_lo;
241 u32 data_hi;
242 u32 curr_ctx;
243 struct channel_gk20a *ch;
244 u32 offset;
245 u32 sub_chan;
246 u32 class_num;
247};
248
249struct gr_ctx_buffer_desc {
250 void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);
251 struct nvgpu_mem mem;
252 void *priv;
253};
254
255struct nvgpu_preemption_modes_rec {
256 u32 graphics_preemption_mode_flags; /* supported preemption modes */
257 u32 compute_preemption_mode_flags; /* supported preemption modes */
258
259 u32 default_graphics_preempt_mode; /* default mode */
260 u32 default_compute_preempt_mode; /* default mode */
261};
262
263struct gr_gk20a {
264 struct gk20a *g;
265 struct {
266 bool dynamic;
267
268 u32 buffer_size;
269 u32 buffer_total_size;
270
271 bool golden_image_initialized;
272 u32 golden_image_size;
273 u32 *local_golden_image;
274
275 u32 hwpm_ctxsw_buffer_offset_map_count;
276 struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;
277
278 u32 zcull_ctxsw_image_size;
279
280 u32 pm_ctxsw_image_size;
281
282 u32 buffer_header_size;
283
284 u32 priv_access_map_size;
285
286 u32 fecs_trace_buffer_size;
287
288 struct gr_ucode_gk20a ucode;
289
290 struct av_list_gk20a sw_bundle_init;
291 struct av_list_gk20a sw_method_init;
292 struct aiv_list_gk20a sw_ctx_load;
293 struct av_list_gk20a sw_non_ctx_load;
294 struct av_list_gk20a sw_veid_bundle_init;
295 struct av64_list_gk20a sw_bundle64_init;
296 struct {
297 struct aiv_list_gk20a sys;
298 struct aiv_list_gk20a gpc;
299 struct aiv_list_gk20a tpc;
300 struct aiv_list_gk20a zcull_gpc;
301 struct aiv_list_gk20a ppc;
302 struct aiv_list_gk20a pm_sys;
303 struct aiv_list_gk20a pm_gpc;
304 struct aiv_list_gk20a pm_tpc;
305 struct aiv_list_gk20a pm_ppc;
306 struct aiv_list_gk20a perf_sys;
307 struct aiv_list_gk20a perf_gpc;
308 struct aiv_list_gk20a fbp;
309 struct aiv_list_gk20a fbp_router;
310 struct aiv_list_gk20a gpc_router;
311 struct aiv_list_gk20a pm_ltc;
312 struct aiv_list_gk20a pm_fbpa;
313 struct aiv_list_gk20a perf_sys_router;
314 struct aiv_list_gk20a perf_pma;
315 struct aiv_list_gk20a pm_rop;
316 struct aiv_list_gk20a pm_ucgpc;
317 struct aiv_list_gk20a etpc;
318 struct aiv_list_gk20a pm_cau;
319 } ctxsw_regs;
320 u32 regs_base_index;
321 bool valid;
322
323 u32 preempt_image_size;
324 bool force_preemption_gfxp;
325 bool force_preemption_cilp;
326 bool dump_ctxsw_stats_on_channel_close;
327 } ctx_vars;
328
329 struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
330 struct nvgpu_mutex fecs_mutex; /* protect fecs method */
331
332#define GR_NETLIST_DYNAMIC -1
333#define GR_NETLIST_STATIC_A 'A'
334 int netlist;
335
336 struct nvgpu_cond init_wq;
337 int initialized;
338
339 u32 num_fbps;
340
341 u32 max_comptag_lines;
342 u32 compbit_backing_size;
343 u32 comptags_per_cacheline;
344 u32 slices_per_ltc;
345 u32 cacheline_size;
346 u32 gobs_per_comptagline_per_slice;
347
348 u32 max_gpc_count;
349 u32 max_fbps_count;
350 u32 max_tpc_per_gpc_count;
351 u32 max_zcull_per_gpc_count;
352 u32 max_tpc_count;
353
354 u32 sys_count;
355 u32 gpc_count;
356 u32 pe_count_per_gpc;
357 u32 ppc_count;
358 u32 *gpc_ppc_count;
359 u32 tpc_count;
360 u32 *gpc_tpc_count;
361 u32 *gpc_tpc_mask;
362 u32 zcb_count;
363 u32 *gpc_zcb_count;
364 u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
365 u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
366 u32 *gpc_skip_mask;
367
368 u32 bundle_cb_default_size;
369 u32 min_gpm_fifo_depth;
370 u32 bundle_cb_token_limit;
371 u32 attrib_cb_default_size;
372 u32 attrib_cb_size;
373 u32 attrib_cb_gfxp_default_size;
374 u32 attrib_cb_gfxp_size;
375 u32 alpha_cb_default_size;
376 u32 alpha_cb_size;
377 u32 timeslice_mode;
378 u32 czf_bypass;
379 u32 pd_max_batches;
380 u32 gfxp_wfi_timeout_count;
381 u32 gfxp_wfi_timeout_unit;
382
383 /*
384 * The deductible memory size for max_comptag_mem (in MBytes)
385 * Usually close to memory size that running system is taking
386 */
387 u32 comptag_mem_deduct;
388
389 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
390
391 u8 *map_tiles;
392 u32 map_tile_count;
393 u32 map_row_offset;
394
395 u32 max_comptag_mem; /* max memory size (MB) for comptag */
396 struct compbit_store_desc compbit_store;
397 struct gk20a_comptag_allocator comp_tags;
398
399 struct gr_zcull_gk20a zcull;
400
401 struct nvgpu_mutex zbc_lock;
402 struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
403 struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
404 struct zbc_s_table zbc_s_tbl[GK20A_ZBC_TABLE_SIZE];
405 s32 max_default_color_index;
406 s32 max_default_depth_index;
407 s32 max_default_s_index;
408
409 u32 max_used_color_index;
410 u32 max_used_depth_index;
411 u32 max_used_s_index;
412
413#define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */
414 struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
415 u32 channel_tlb_flush_index;
416 struct nvgpu_spinlock ch_tlb_lock;
417
418 void (*remove_support)(struct gr_gk20a *gr);
419 bool sw_ready;
420 bool skip_ucode_init;
421
422 struct nvgpu_preemption_modes_rec preemption_mode_rec;
423
424 u32 fecs_feature_override_ecc_val;
425
426 int cilp_preempt_pending_chid;
427
428 u32 fbp_en_mask;
429 u32 *fbp_rop_l2_en_mask;
430 u32 no_of_sm;
431 struct sm_info *sm_to_cluster;
432
433#if defined(CONFIG_GK20A_CYCLE_STATS)
434 struct nvgpu_mutex cs_lock;
435 struct gk20a_cs_snapshot *cs_data;
436#endif
437 u32 max_css_buffer_size;
438};
439
440void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
441
442/* contexts associated with a TSG */
443struct nvgpu_gr_ctx {
444 struct nvgpu_mem mem;
445
446 u32 graphics_preempt_mode;
447 u32 compute_preempt_mode;
448
449 struct nvgpu_mem preempt_ctxsw_buffer;
450 struct nvgpu_mem spill_ctxsw_buffer;
451 struct nvgpu_mem betacb_ctxsw_buffer;
452 struct nvgpu_mem pagepool_ctxsw_buffer;
453 u32 ctx_id;
454 bool ctx_id_valid;
455 bool cilp_preempt_pending;
456 bool boosted_ctx;
457 bool golden_img_loaded;
458
459#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
460 u64 virt_ctx;
461#endif
462
463 struct patch_desc patch_ctx;
464 struct zcull_ctx_desc zcull_ctx;
465 struct pm_ctx_desc pm_ctx;
466 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
467 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
468 int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
469 bool global_ctx_buffer_mapped;
470
471 u32 tsgid;
472};
473
474struct gk20a_ctxsw_ucode_segment {
475 u32 offset;
476 u32 size;
477};
478
479struct gk20a_ctxsw_ucode_segments {
480 u32 boot_entry;
481 u32 boot_imem_offset;
482 u32 boot_signature;
483 struct gk20a_ctxsw_ucode_segment boot;
484 struct gk20a_ctxsw_ucode_segment code;
485 struct gk20a_ctxsw_ucode_segment data;
486};
487
488/* sums over the ucode files as sequences of u32, computed to the
489 * boot_signature field in the structure above */
490
491/* T18X FECS remains same as T21X,
492 * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
493 * for T18X*/
494#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34
495#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5c
496#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5c
497#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78
498#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b
499#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f
500
501#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2
502#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5
503#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3
504#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877
505
506#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d
507#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10
508
509#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da
510
511struct gk20a_ctxsw_ucode_info {
512 u64 *p_va;
513 struct nvgpu_mem inst_blk_desc;
514 struct nvgpu_mem surface_desc;
515 struct gk20a_ctxsw_ucode_segments fecs;
516 struct gk20a_ctxsw_ucode_segments gpccs;
517};
518
519struct gk20a_ctxsw_bootloader_desc {
520 u32 start_offset;
521 u32 size;
522 u32 imem_offset;
523 u32 entry_point;
524};
525
526struct fecs_method_op_gk20a {
527 struct {
528 u32 addr;
529 u32 data;
530 } method;
531
532 struct {
533 u32 id;
534 u32 data;
535 u32 clr;
536 u32 *ret;
537 u32 ok;
538 u32 fail;
539 } mailbox;
540
541 struct {
542 u32 ok;
543 u32 fail;
544 } cond;
545
546};
547
548struct nvgpu_warpstate {
549 u64 valid_warps[2];
550 u64 trapped_warps[2];
551 u64 paused_warps[2];
552};
553
554struct gpu_ops;
555int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
556 struct channel_gk20a *c);
557void gk20a_init_gr(struct gk20a *g);
558int gk20a_init_gr_support(struct gk20a *g);
559int gk20a_enable_gr_hw(struct gk20a *g);
560int gk20a_gr_reset(struct gk20a *g);
561void gk20a_gr_wait_initialized(struct gk20a *g);
562
563int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
564
565int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
566
567int gk20a_gr_isr(struct gk20a *g);
568u32 gk20a_gr_nonstall_isr(struct gk20a *g);
569
570/* zcull */
571u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
572int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
573 struct channel_gk20a *c, u64 zcull_va, u32 mode);
574int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
575 struct gr_zcull_info *zcull_params);
576void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
577 u32 *zcull_map_tiles);
578/* zbc */
579int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
580 struct zbc_entry *zbc_val);
581int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
582 struct zbc_query_params *query_params);
583int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
584 struct zbc_entry *zbc_val);
585int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
586
587/* pmu */
588int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
589int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
590 struct nvgpu_mem *inst_block);
591int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
592
593void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);
594
595/* sm */
596bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
597u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g);
598
599#define gr_gk20a_elpg_protected_call(g, func) \
600 ({ \
601 int err = 0; \
602 if (g->support_pmu) {\
603 err = nvgpu_pg_elpg_disable(g);\
604 if (err != 0) {\
605 err = nvgpu_pg_elpg_enable(g); \
606 } \
607 } \
608 if (err == 0) { \
609 err = func; \
610 if (g->support_pmu) {\
611 (void)nvgpu_pg_elpg_enable(g); \
612 } \
613 } \
614 err; \
615 })
616
617int gk20a_gr_suspend(struct gk20a *g);
618
619struct nvgpu_dbg_reg_op;
620int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
621 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
622 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
623 bool *is_curr_ctx);
624int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
625 struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
626 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
627 bool ch_is_curr_ctx);
628int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
629 u32 addr,
630 u32 max_offsets,
631 u32 *offsets, u32 *offset_addrs,
632 u32 *num_offsets,
633 bool is_quad, u32 quad);
634int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
635 u32 addr,
636 u32 max_offsets,
637 u32 *offsets, u32 *offset_addrs,
638 u32 *num_offsets);
639int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
640 struct channel_gk20a *c,
641 bool enable_smpc_ctxsw);
642int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
643 struct channel_gk20a *c,
644 u64 gpu_va,
645 u32 mode);
646
647struct nvgpu_gr_ctx;
648void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
649 u32 addr, u32 data, bool patch);
650int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
651 struct nvgpu_gr_ctx *ch_ctx,
652 bool update_patch_count);
653void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
654 struct nvgpu_gr_ctx *ch_ctx,
655 bool update_patch_count);
656void gr_gk20a_commit_global_pagepool(struct gk20a *g,
657 struct nvgpu_gr_ctx *ch_ctx,
658 u64 addr, u32 size, bool patch);
659void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
660void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
661int gr_gk20a_init_fs_state(struct gk20a *g);
662int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
663int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
664int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
665void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
666void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
667 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
668void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
669 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
670
671
672void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
673int gr_gk20a_disable_ctxsw(struct gk20a *g);
674int gr_gk20a_enable_ctxsw(struct gk20a *g);
675void gk20a_gr_resume_single_sm(struct gk20a *g,
676 u32 gpc, u32 tpc, u32 sm);
677void gk20a_gr_resume_all_sms(struct gk20a *g);
678void gk20a_gr_suspend_single_sm(struct gk20a *g,
679 u32 gpc, u32 tpc, u32 sm,
680 u32 global_esr_mask, bool check_errors);
681void gk20a_gr_suspend_all_sms(struct gk20a *g,
682 u32 global_esr_mask, bool check_errors);
683u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
684int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
685 struct channel_gk20a *ch, u64 sms, bool enable);
686bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
687int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
688 struct zbc_entry *color_val, u32 index);
689int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
690 struct zbc_entry *depth_val, u32 index);
691int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
692 struct zbc_entry *zbc_val);
693void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
694int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
695 u32 expect_delay);
696int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
697 bool *post_event, struct channel_gk20a *fault_ch,
698 u32 *hww_global_esr);
699int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
700 bool *post_event);
701int gr_gk20a_init_ctx_state(struct gk20a *g);
702int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
703 struct fecs_method_op_gk20a op,
704 bool sleepduringwait);
705int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
706 struct fecs_method_op_gk20a op,
707 bool sleepduringwait);
708int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
709 struct fecs_method_op_gk20a op);
710int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
711 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
712 u32 class, u32 padding);
713void gr_gk20a_free_gr_ctx(struct gk20a *g,
714 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
715int gr_gk20a_halt_pipe(struct gk20a *g);
716
717#if defined(CONFIG_GK20A_CYCLE_STATS)
718int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
719 u32 perfmon_id_count, /* in - number of perfmons*/
720 u32 *perfmon_id_start, /* out- index of first pm */
721 /* in/out - pointer to client data used in later */
722 struct gk20a_cs_snapshot_client *css_client);
723
724int gr_gk20a_css_detach(struct channel_gk20a *ch,
725 struct gk20a_cs_snapshot_client *css_client);
726int gr_gk20a_css_flush(struct channel_gk20a *ch,
727 struct gk20a_cs_snapshot_client *css_client);
728
729void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
730
731#else
732/* fake empty cleanup function if no cyclestats snapshots enabled */
733static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
734{
735 (void)g;
736}
737#endif
738
739void gr_gk20a_fecs_host_int_enable(struct gk20a *g);
740int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
741 struct gr_gk20a_isr_data *isr_data);
742int gk20a_gr_lock_down_sm(struct gk20a *g,
743 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
744 bool check_errors);
745int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
746 u32 global_esr_mask, bool check_errors);
747int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
748 u32 *mailbox_ret, u32 opc_success,
749 u32 mailbox_ok, u32 opc_fail,
750 u32 mailbox_fail, bool sleepduringwait);
751
752int gr_gk20a_get_ctx_id(struct gk20a *g,
753 struct channel_gk20a *c,
754 u32 *ctx_id);
755
756u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
757u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
758
759int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
760 u32 expect_delay);
761
762struct dbg_session_gk20a;
763
764bool gr_gk20a_suspend_context(struct channel_gk20a *ch);
765bool gr_gk20a_resume_context(struct channel_gk20a *ch);
766int gr_gk20a_suspend_contexts(struct gk20a *g,
767 struct dbg_session_gk20a *dbg_s,
768 int *ctx_resident_ch_fd);
769int gr_gk20a_resume_contexts(struct gk20a *g,
770 struct dbg_session_gk20a *dbg_s,
771 int *ctx_resident_ch_fd);
772void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
773void gk20a_gr_enable_exceptions(struct gk20a *g);
774int gr_gk20a_trigger_suspend(struct gk20a *g);
775int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state);
776int gr_gk20a_resume_from_pause(struct gk20a *g);
777int gr_gk20a_clear_sm_errors(struct gk20a *g);
778u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
779
780int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
781
782int gr_gk20a_init_sm_id_table(struct gk20a *g);
783
784int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
785
786void gr_gk20a_write_zcull_ptr(struct gk20a *g,
787 struct nvgpu_mem *mem, u64 gpu_va);
788
789void gr_gk20a_write_pm_ptr(struct gk20a *g,
790 struct nvgpu_mem *mem, u64 gpu_va);
791
792u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
793u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
794void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
795 u32 *esr_sm_sel);
796void gk20a_gr_init_ovr_sm_dsm_perf(void);
797void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
798 u32 **ovr_perf_regs);
799void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
800 struct nvgpu_mem *mem);
801u32 gr_gk20a_get_patch_slots(struct gk20a *g);
802int gk20a_gr_handle_notify_pending(struct gk20a *g,
803 struct gr_gk20a_isr_data *isr_data);
804
805int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
806int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
807 struct channel_gk20a *c);
808int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
809 struct channel_gk20a *c, bool patch);
810
811int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
812 struct channel_gk20a *c);
813u32 gk20a_init_sw_bundle(struct gk20a *g);
814int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
815int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
816 struct gr_gk20a_isr_data *isr_data);
817int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
818 struct ctxsw_buf_offset_map_entry *map,
819 struct aiv_list_gk20a *regs,
820 u32 *count, u32 *offset,
821 u32 max_cnt, u32 base,
822 u32 num_fbpas, u32 stride, u32 mask);
823int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
824 struct aiv_list_gk20a *regs,
825 u32 *count, u32 *offset,
826 u32 max_cnt, u32 base, u32 mask);
827int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
828 enum ctxsw_addr_type *addr_type,
829 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
830 u32 *broadcast_flags);
831int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
832 u32 gpc_num,
833 u32 *priv_addr_table, u32 *t);
834int gr_gk20a_create_priv_addr_table(struct gk20a *g,
835 u32 addr,
836 u32 *priv_addr_table,
837 u32 *num_registers);
838void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
839 u32 num_fbpas,
840 u32 *priv_addr_table, u32 *t);
841int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
842 enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs,
843 u32 reg_list_ppc_count, u32 *__offset_in_segment);
844
845void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
846 struct gr_ctx_buffer_desc *desc);
847int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
848 struct gr_ctx_buffer_desc *desc, size_t size);
849void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr);
850int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g);
851#endif /*__GR_GK20A_H__*/