summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
blob: 901835c81b9901dac0e087c73a7733242e172c64 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
/*
 * GK20A Graphics Engine
 *
 * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
#ifndef GR_GK20A_H
#define GR_GK20A_H

#include <nvgpu/types.h>

#include "gr_ctx_gk20a.h"
#include "mm_gk20a.h"
#include <nvgpu/power_features/pg.h>

#include <nvgpu/comptags.h>
#include <nvgpu/cond.h>

#define GR_IDLE_CHECK_DEFAULT		10 /* usec */
#define GR_IDLE_CHECK_MAX		200 /* usec */
#define GR_FECS_POLL_INTERVAL		5 /* usec */

#define INVALID_SCREEN_TILE_ROW_OFFSET	0xFFFFFFFF
#define INVALID_MAX_WAYS		0xFFFFFFFF

#define GK20A_FECS_UCODE_IMAGE	"fecs.bin"
#define GK20A_GPCCS_UCODE_IMAGE	"gpccs.bin"

#define GK20A_GR_MAX_PES_PER_GPC 3

#define GK20A_TIMEOUT_FPGA		100000 /* 100 sec */

/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP		(1 << 1)
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP		(1 << 2)

/*
 * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
 * of address and data pairs
 */
#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY	2
#define PATCH_CTX_SLOTS_PER_PAGE \
	(PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32)))
#define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32))

#define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI	(1 << 0)
#define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP	(1 << 1)

#define NVGPU_PREEMPTION_MODE_COMPUTE_WFI	(1 << 0)
#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA	(1 << 1)
#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP	(1 << 2)

#define CTXSW_INTR0				BIT32(0)
#define CTXSW_INTR1				BIT32(1)

#define MAILBOX_VALUE_TIMESTAMP_BUFFER_FULL	0x26

struct tsg_gk20a;
struct channel_gk20a;
struct nvgpu_warpstate;

enum ctxsw_addr_type;

enum /* global_ctx_buffer */ {
	CIRCULAR		= 0,
	PAGEPOOL		= 1,
	ATTRIBUTE		= 2,
	CIRCULAR_VPR		= 3,
	PAGEPOOL_VPR		= 4,
	ATTRIBUTE_VPR		= 5,
	GOLDEN_CTX		= 6,
	PRIV_ACCESS_MAP		= 7,
	/* #8 is reserved */
	FECS_TRACE_BUFFER	= 9,
	NR_GLOBAL_CTX_BUF	= 10
};

/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
enum  /*global_ctx_buffer_va */ {
	CIRCULAR_VA		= 0,
	PAGEPOOL_VA		= 1,
	ATTRIBUTE_VA		= 2,
	GOLDEN_CTX_VA		= 3,
	PRIV_ACCESS_MAP_VA	= 4,
	/* #5 is reserved */
	FECS_TRACE_BUFFER_VA	= 6,
	NR_GLOBAL_CTX_BUF_VA	= 7
};

enum {
	WAIT_UCODE_LOOP,
	WAIT_UCODE_TIMEOUT,
	WAIT_UCODE_ERROR,
	WAIT_UCODE_OK
};

enum {
	GR_IS_UCODE_OP_EQUAL,
	GR_IS_UCODE_OP_NOT_EQUAL,
	GR_IS_UCODE_OP_AND,
	GR_IS_UCODE_OP_LESSER,
	GR_IS_UCODE_OP_LESSER_EQUAL,
	GR_IS_UCODE_OP_SKIP
};

enum {
	eUcodeHandshakeInitComplete = 1,
	eUcodeHandshakeMethodFinished
};

enum {
	ELCG_MODE = (1 << 0),
	BLCG_MODE = (1 << 1),
	INVALID_MODE = (1 << 2)
};

enum {
	NVGPU_EVENT_ID_BPT_INT = 0,
	NVGPU_EVENT_ID_BPT_PAUSE,
	NVGPU_EVENT_ID_BLOCKING_SYNC,
	NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED,
	NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE,
	NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN,
	NVGPU_EVENT_ID_MAX,
};

#ifndef GR_GO_IDLE_BUNDLE
#define GR_GO_IDLE_BUNDLE	0x0000e100 /* --V-B */
#endif

struct gr_channel_map_tlb_entry {
	u32 curr_ctx;
	u32 chid;
	u32 tsgid;
};

struct gr_zcull_gk20a {
	u32 aliquot_width;
	u32 aliquot_height;
	u32 aliquot_size;
	u32 total_aliquots;

	u32 width_align_pixels;
	u32 height_align_pixels;
	u32 pixel_squares_by_aliquots;
};

struct gr_zcull_info {
	u32 width_align_pixels;
	u32 height_align_pixels;
	u32 pixel_squares_by_aliquots;
	u32 aliquot_total;
	u32 region_byte_multiplier;
	u32 region_header_size;
	u32 subregion_header_size;
	u32 subregion_width_align_pixels;
	u32 subregion_height_align_pixels;
	u32 subregion_count;
};

#define GK20A_ZBC_COLOR_VALUE_SIZE	4  /* RGBA */

#define GK20A_STARTOF_ZBC_TABLE		1U   /* index zero reserved to indicate "not ZBCd" */
#define GK20A_SIZEOF_ZBC_TABLE		16  /* match ltcs_ltss_dstg_zbc_index_address width (4) */
#define GK20A_ZBC_TABLE_SIZE		(16 - 1)

#define GK20A_ZBC_TYPE_INVALID		0
#define GK20A_ZBC_TYPE_COLOR		1
#define GK20A_ZBC_TYPE_DEPTH		2
#define T19X_ZBC			3

struct zbc_color_table {
	u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
	u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
	u32 format;
	u32 ref_cnt;
};

struct zbc_depth_table {
	u32 depth;
	u32 format;
	u32 ref_cnt;
};

struct zbc_s_table {
	u32 stencil;
	u32 format;
	u32 ref_cnt;
};

struct zbc_entry {
	u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
	u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
	u32 depth;
	u32 type;	/* color or depth */
	u32 format;
};

struct zbc_query_params {
	u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
	u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
	u32 depth;
	u32 ref_cnt;
	u32 format;
	u32 type;	/* color or depth */
	u32 index_size;	/* [out] size, [in] index */
};

struct sm_info {
	u32 gpc_index;
	u32 tpc_index;
	u32 sm_index;
	u32 global_tpc_index;
};

#if defined(CONFIG_GK20A_CYCLE_STATS)
struct gk20a_cs_snapshot_client;
struct gk20a_cs_snapshot;
#endif

struct gr_gk20a_isr_data {
	u32 addr;
	u32 data_lo;
	u32 data_hi;
	u32 curr_ctx;
	struct channel_gk20a *ch;
	u32 offset;
	u32 sub_chan;
	u32 class_num;
};

struct gr_ctx_buffer_desc {
	void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);
	struct nvgpu_mem mem;
	void *priv;
};

struct nvgpu_preemption_modes_rec {
	u32 graphics_preemption_mode_flags; /* supported preemption modes */
	u32 compute_preemption_mode_flags; /* supported preemption modes */

	u32 default_graphics_preempt_mode; /* default mode */
	u32 default_compute_preempt_mode; /* default mode */
};

struct gr_gk20a {
	struct gk20a *g;
	struct {
		bool dynamic;

		u32 buffer_size;
		u32 buffer_total_size;

		bool golden_image_initialized;
		u32 golden_image_size;
		u32 *local_golden_image;

		u32 hwpm_ctxsw_buffer_offset_map_count;
		struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;

		u32 zcull_ctxsw_image_size;

		u32 pm_ctxsw_image_size;

		u32 buffer_header_size;

		u32 priv_access_map_size;

		u32 fecs_trace_buffer_size;

		struct gr_ucode_gk20a ucode;

		struct av_list_gk20a  sw_bundle_init;
		struct av_list_gk20a  sw_method_init;
		struct aiv_list_gk20a sw_ctx_load;
		struct av_list_gk20a  sw_non_ctx_load;
		struct av_list_gk20a  sw_veid_bundle_init;
		struct av64_list_gk20a sw_bundle64_init;
		struct {
			struct aiv_list_gk20a sys;
			struct aiv_list_gk20a gpc;
			struct aiv_list_gk20a tpc;
			struct aiv_list_gk20a zcull_gpc;
			struct aiv_list_gk20a ppc;
			struct aiv_list_gk20a pm_sys;
			struct aiv_list_gk20a pm_gpc;
			struct aiv_list_gk20a pm_tpc;
			struct aiv_list_gk20a pm_ppc;
			struct aiv_list_gk20a perf_sys;
			struct aiv_list_gk20a perf_gpc;
			struct aiv_list_gk20a fbp;
			struct aiv_list_gk20a fbp_router;
			struct aiv_list_gk20a gpc_router;
			struct aiv_list_gk20a pm_ltc;
			struct aiv_list_gk20a pm_fbpa;
			struct aiv_list_gk20a perf_sys_router;
			struct aiv_list_gk20a perf_pma;
			struct aiv_list_gk20a pm_rop;
			struct aiv_list_gk20a pm_ucgpc;
			struct aiv_list_gk20a etpc;
			struct aiv_list_gk20a pm_cau;
		} ctxsw_regs;
		u32 regs_base_index;
		bool valid;

		u32 preempt_image_size;
		bool force_preemption_gfxp;
		bool force_preemption_cilp;
		bool dump_ctxsw_stats_on_channel_close;
	} ctx_vars;

	struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
	struct nvgpu_mutex fecs_mutex; /* protect fecs method */

#define GR_NETLIST_DYNAMIC	-1
#define GR_NETLIST_STATIC_A	'A'
	int netlist;

	struct nvgpu_cond init_wq;
	int initialized;

	u32 num_fbps;

	u32 max_comptag_lines;
	u32 compbit_backing_size;
	u32 comptags_per_cacheline;
	u32 slices_per_ltc;
	u32 cacheline_size;
	u32 gobs_per_comptagline_per_slice;

	u32 max_gpc_count;
	u32 max_fbps_count;
	u32 max_tpc_per_gpc_count;
	u32 max_zcull_per_gpc_count;
	u32 max_tpc_count;

	u32 sys_count;
	u32 gpc_count;
	u32 pe_count_per_gpc;
	u32 ppc_count;
	u32 *gpc_ppc_count;
	u32 tpc_count;
	u32 *gpc_tpc_count;
	u32 *gpc_tpc_mask;
	u32 zcb_count;
	u32 *gpc_zcb_count;
	u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
	u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
	u32 *gpc_skip_mask;

	u32 bundle_cb_default_size;
	u32 min_gpm_fifo_depth;
	u32 bundle_cb_token_limit;
	u32 attrib_cb_default_size;
	u32 attrib_cb_size;
	u32 attrib_cb_gfxp_default_size;
	u32 attrib_cb_gfxp_size;
	u32 alpha_cb_default_size;
	u32 alpha_cb_size;
	u32 timeslice_mode;
	u32 czf_bypass;
	u32 pd_max_batches;
	u32 gfxp_wfi_timeout_count;
	u32 gfxp_wfi_timeout_unit;

	/*
	 * The deductible memory size for max_comptag_mem (in MBytes)
	 * Usually close to memory size that running system is taking
	 */
	u32 comptag_mem_deduct;

	struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];

	u8 *map_tiles;
	u32 map_tile_count;
	u32 map_row_offset;

	u32 max_comptag_mem; /* max memory size (MB) for comptag */
	struct compbit_store_desc compbit_store;
	struct gk20a_comptag_allocator comp_tags;

	struct gr_zcull_gk20a zcull;

	struct nvgpu_mutex zbc_lock;
	struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
	struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
	struct zbc_s_table zbc_s_tbl[GK20A_ZBC_TABLE_SIZE];
	s32 max_default_color_index;
	s32 max_default_depth_index;
	s32 max_default_s_index;

	u32 max_used_color_index;
	u32 max_used_depth_index;
	u32 max_used_s_index;

#define GR_CHANNEL_MAP_TLB_SIZE		2 /* must of power of 2 */
	struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
	u32 channel_tlb_flush_index;
	struct nvgpu_spinlock ch_tlb_lock;

	void (*remove_support)(struct gr_gk20a *gr);
	bool sw_ready;
	bool skip_ucode_init;

	struct nvgpu_preemption_modes_rec preemption_mode_rec;

	u32 fecs_feature_override_ecc_val;

	int cilp_preempt_pending_chid;

	u32 fbp_en_mask;
	u32 *fbp_rop_l2_en_mask;
	u32 no_of_sm;
	struct sm_info *sm_to_cluster;

#if defined(CONFIG_GK20A_CYCLE_STATS)
	struct nvgpu_mutex			cs_lock;
	struct gk20a_cs_snapshot	*cs_data;
#endif
	u32 max_css_buffer_size;
};

void gk20a_fecs_dump_falcon_stats(struct gk20a *g);

/* contexts associated with a TSG */
struct nvgpu_gr_ctx {
	struct nvgpu_mem mem;

	u32 graphics_preempt_mode;
	u32 compute_preempt_mode;

	struct nvgpu_mem preempt_ctxsw_buffer;
	struct nvgpu_mem spill_ctxsw_buffer;
	struct nvgpu_mem betacb_ctxsw_buffer;
	struct nvgpu_mem pagepool_ctxsw_buffer;
	u32 ctx_id;
	bool ctx_id_valid;
	bool cilp_preempt_pending;
	bool boosted_ctx;
	bool golden_img_loaded;

#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
	u64 virt_ctx;
#endif

	struct patch_desc	patch_ctx;
	struct zcull_ctx_desc	zcull_ctx;
	struct pm_ctx_desc	pm_ctx;
	u64	global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
	u64	global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
	int	global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
	bool	global_ctx_buffer_mapped;

	u32 tsgid;
};

struct gk20a_ctxsw_ucode_segment {
	u32 offset;
	u32 size;
};

struct gk20a_ctxsw_ucode_segments {
	u32 boot_entry;
	u32 boot_imem_offset;
	u32 boot_signature;
	struct gk20a_ctxsw_ucode_segment boot;
	struct gk20a_ctxsw_ucode_segment code;
	struct gk20a_ctxsw_ucode_segment data;
};

/* sums over the ucode files as sequences of u32, computed to the
 * boot_signature field in the structure above */

/* T18X FECS remains same as T21X,
 * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
 * for T18X*/
#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED	0x68edab34
#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE	0x9121ab5c
#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED	0x9125ab5c
#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED	0x8a621f78
#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED	0x67e5344b
#define FALCON_UCODE_SIG_T12X_FECS_OLDER		0x56da09f

#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED	0x3d3d65e2
#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED	0x303465d5
#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED	0x3fdd33d3
#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER		0x53d7877

#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED	0x93671b7d
#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2	0x4d6cbc10

#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED	0x393161da

struct gk20a_ctxsw_ucode_info {
	u64 *p_va;
	struct nvgpu_mem inst_blk_desc;
	struct nvgpu_mem surface_desc;
	struct gk20a_ctxsw_ucode_segments fecs;
	struct gk20a_ctxsw_ucode_segments gpccs;
};

struct gk20a_ctxsw_bootloader_desc {
	u32 start_offset;
	u32 size;
	u32 imem_offset;
	u32 entry_point;
};

struct fecs_method_op_gk20a {
	struct {
		u32 addr;
		u32 data;
	} method;

	struct {
		u32 id;
		u32 data;
		u32 clr;
		u32 *ret;
		u32 ok;
		u32 fail;
	} mailbox;

	struct {
		u32 ok;
		u32 fail;
	} cond;

};

struct nvgpu_warpstate {
	u64 valid_warps[2];
	u64 trapped_warps[2];
	u64 paused_warps[2];
};

struct gpu_ops;
int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
					struct channel_gk20a *c);
void gk20a_init_gr(struct gk20a *g);
int gk20a_init_gr_support(struct gk20a *g);
int gk20a_enable_gr_hw(struct gk20a *g);
int gk20a_gr_reset(struct gk20a *g);
void gk20a_gr_wait_initialized(struct gk20a *g);

int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);

int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags);

int gk20a_gr_isr(struct gk20a *g);
u32 gk20a_gr_nonstall_isr(struct gk20a *g);

/* zcull */
u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
			struct channel_gk20a *c, u64 zcull_va, u32 mode);
int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
			struct gr_zcull_info *zcull_params);
void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
					u32 *zcull_map_tiles);
/* zbc */
int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
			struct zbc_entry *zbc_val);
int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
			struct zbc_query_params *query_params);
int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
			struct zbc_entry *zbc_val);
int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);

/* pmu */
int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
		struct nvgpu_mem *inst_block);
int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);

void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);

/* sm */
bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g);

#define gr_gk20a_elpg_protected_call(g, func) \
	({ \
		int err = 0; \
		if (g->support_pmu) {\
			err = nvgpu_pg_elpg_disable(g);\
			if (err != 0) {\
				(void)nvgpu_pg_elpg_enable(g); \
			} \
		} \
		if (err == 0) { \
			err = func; \
			if (g->support_pmu) {\
				(void)nvgpu_pg_elpg_enable(g); \
			} \
		} \
		err; \
	})

int gk20a_gr_suspend(struct gk20a *g);

struct nvgpu_dbg_reg_op;
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
			  struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
			  u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
			  bool *is_curr_ctx);
int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
			    struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
			    u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
			    bool ch_is_curr_ctx);
int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
				    u32 addr,
				    u32 max_offsets,
				    u32 *offsets, u32 *offset_addrs,
				    u32 *num_offsets,
				    bool is_quad, u32 quad);
int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
				       u32 addr,
				       u32 max_offsets,
				       u32 *offsets, u32 *offset_addrs,
				       u32 *num_offsets);
int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
				    struct channel_gk20a *c,
				    bool enable_smpc_ctxsw);
int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
				  struct channel_gk20a *c,
				  u64 gpu_va,
				  u32 mode);

struct nvgpu_gr_ctx;
void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
				    u32 addr, u32 data, bool patch);
int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
					  struct nvgpu_gr_ctx *ch_ctx,
					  bool update_patch_count);
void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
					struct nvgpu_gr_ctx *ch_ctx,
					bool update_patch_count);
void gr_gk20a_commit_global_pagepool(struct gk20a *g,
				     struct nvgpu_gr_ctx *ch_ctx,
				     u64 addr, u32 size, bool patch);
void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
int gr_gk20a_init_fs_state(struct gk20a *g);
int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);


void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
int gr_gk20a_disable_ctxsw(struct gk20a *g);
int gr_gk20a_enable_ctxsw(struct gk20a *g);
void gk20a_gr_resume_single_sm(struct gk20a *g,
		u32 gpc, u32 tpc, u32 sm);
void gk20a_gr_resume_all_sms(struct gk20a *g);
void gk20a_gr_suspend_single_sm(struct gk20a *g,
		u32 gpc, u32 tpc, u32 sm,
		u32 global_esr_mask, bool check_errors);
void gk20a_gr_suspend_all_sms(struct gk20a *g,
		u32 global_esr_mask, bool check_errors);
u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
	struct channel_gk20a *ch, u64 sms, bool enable);
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
			   struct zbc_entry *color_val, u32 index);
int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
			   struct zbc_entry *depth_val, u32 index);
int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
			struct zbc_entry *zbc_val);
void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
		       u32 expect_delay);
int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
		bool *post_event, struct channel_gk20a *fault_ch,
		u32 *hww_global_esr);
int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
					bool *post_event);
int gr_gk20a_init_ctx_state(struct gk20a *g);
int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
				   struct fecs_method_op_gk20a op,
				   bool sleepduringwait);
int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
				   struct fecs_method_op_gk20a op,
				   bool sleepduringwait);
int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
		struct fecs_method_op_gk20a op);
int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
			  struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
			  u32 class, u32 padding);
void gr_gk20a_free_gr_ctx(struct gk20a *g,
		       struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
int gr_gk20a_halt_pipe(struct gk20a *g);

#if defined(CONFIG_GK20A_CYCLE_STATS)
int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
			u32 perfmon_id_count,	    /* in - number of perfmons*/
			u32 *perfmon_id_start,	    /* out- index of first pm */
			/* in/out - pointer to client data used in later     */
			struct gk20a_cs_snapshot_client *css_client);

int gr_gk20a_css_detach(struct channel_gk20a *ch,
				struct gk20a_cs_snapshot_client *css_client);
int gr_gk20a_css_flush(struct channel_gk20a *ch,
				struct gk20a_cs_snapshot_client *css_client);

void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);

#else
/* fake empty cleanup function if no cyclestats snapshots enabled */
static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
{
	(void)g;
}
#endif

void gr_gk20a_fecs_host_int_enable(struct gk20a *g);
int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
		struct gr_gk20a_isr_data *isr_data);
int gk20a_gr_lock_down_sm(struct gk20a *g,
			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
			 bool check_errors);
int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
		u32 global_esr_mask, bool check_errors);
int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
			    u32 *mailbox_ret, u32 opc_success,
			    u32 mailbox_ok, u32 opc_fail,
			    u32 mailbox_fail, bool sleepduringwait);

int gr_gk20a_get_ctx_id(struct gk20a *g,
		struct channel_gk20a *c,
		u32 *ctx_id);

u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);

int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
			  u32 expect_delay);

struct dbg_session_gk20a;

bool gr_gk20a_suspend_context(struct channel_gk20a *ch);
bool gr_gk20a_resume_context(struct channel_gk20a *ch);
int gr_gk20a_suspend_contexts(struct gk20a *g,
			      struct dbg_session_gk20a *dbg_s,
			      int *ctx_resident_ch_fd);
int gr_gk20a_resume_contexts(struct gk20a *g,
			      struct dbg_session_gk20a *dbg_s,
			      int *ctx_resident_ch_fd);
void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
void gk20a_gr_enable_exceptions(struct gk20a *g);
int gr_gk20a_trigger_suspend(struct gk20a *g);
int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state);
int gr_gk20a_resume_from_pause(struct gk20a *g);
int gr_gk20a_clear_sm_errors(struct gk20a *g);
u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);

int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);

int gr_gk20a_init_sm_id_table(struct gk20a *g);

int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);

void gr_gk20a_write_zcull_ptr(struct gk20a *g,
				struct nvgpu_mem *mem, u64 gpu_va);

void gr_gk20a_write_pm_ptr(struct gk20a *g,
				struct nvgpu_mem *mem, u64 gpu_va);

u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
				u32 *esr_sm_sel);
void gk20a_gr_init_ovr_sm_dsm_perf(void);
void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
					       u32 **ovr_perf_regs);
void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
					struct nvgpu_mem *mem);
u32 gr_gk20a_get_patch_slots(struct gk20a *g);
int gk20a_gr_handle_notify_pending(struct gk20a *g,
				struct gr_gk20a_isr_data *isr_data);

int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
				struct channel_gk20a *c);
int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
			struct channel_gk20a *c, bool patch);

int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
					struct channel_gk20a *c);
u32 gk20a_init_sw_bundle(struct gk20a *g);
int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
				struct gr_gk20a_isr_data *isr_data);
int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
				struct ctxsw_buf_offset_map_entry *map,
				struct aiv_list_gk20a *regs,
				u32 *count, u32 *offset,
				u32 max_cnt, u32 base,
				u32 num_fbpas, u32 stride, u32 mask);
int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
	struct aiv_list_gk20a *regs,
	u32 *count, u32 *offset,
	u32 max_cnt, u32 base, u32 mask);
int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
	enum ctxsw_addr_type *addr_type,
	u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
	u32 *broadcast_flags);
int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
	u32 gpc_num,
	u32 *priv_addr_table, u32 *t);
int gr_gk20a_create_priv_addr_table(struct gk20a *g,
	u32 addr,
	u32 *priv_addr_table,
	u32 *num_registers);
void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
	u32 num_fbpas,
	u32 *priv_addr_table, u32 *t);
int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
	enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs,
	u32 reg_list_ppc_count, u32 *__offset_in_segment);

void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
	struct gr_ctx_buffer_desc *desc);
int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
	struct gr_ctx_buffer_desc *desc, size_t size);
void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr);
int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g);
#endif /*__GR_GK20A_H__*/