1 files changed, 779 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
new file mode 100644
index 00000000..3377df5d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -0,0 +1,779 @@
+/*
+ * GK20A Graphics Engine
+ *
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef GR_GK20A_H
+#define GR_GK20A_H
+#include <uapi/linux/nvgpu.h>
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+#include "gr_t18x.h"
+#endif
+#ifdef CONFIG_TEGRA_19x_GPU
+#include "gr_t19x.h"
+#endif
+#include "tsg_gk20a.h"
+#include "gr_ctx_gk20a.h"
+#include "mm_gk20a.h"
+#include <nvgpu/comptags.h>
+#define GR_IDLE_CHECK_DEFAULT           10 /* usec */
+#define GR_IDLE_CHECK_MAX               200 /* usec */
+#define GR_FECS_POLL_INTERVAL           5 /* usec */
+#define INVALID_SCREEN_TILE_ROW_OFFSET  0xFFFFFFFF
+#define INVALID_MAX_WAYS                0xFFFFFFFF
+#define GK20A_FECS_UCODE_IMAGE  "fecs.bin"
+#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
+#define GK20A_GR_MAX_PES_PER_GPC 3
+#define GK20A_TIMEOUT_FPGA              100000 /* 100 sec */
+/*
+ * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
+ * of address and data pairs
+ */
+#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY      2
+#define PATCH_CTX_SLOTS_PER_PAGE \
+        (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32)))
+#define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32))
+#define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI      (1 << 0)
+#define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP     (1 << 1)
+#define NVGPU_PREEMPTION_MODE_COMPUTE_WFI       (1 << 0)
+#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA       (1 << 1)
+#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP      (1 << 2)
+struct channel_gk20a;
+struct nvgpu_warpstate;
+enum /* global_ctx_buffer */ {
+        CIRCULAR                = 0,
+        PAGEPOOL                = 1,
+        ATTRIBUTE               = 2,
+        CIRCULAR_VPR            = 3,
+        PAGEPOOL_VPR            = 4,
+        ATTRIBUTE_VPR           = 5,
+        GOLDEN_CTX              = 6,
+        PRIV_ACCESS_MAP         = 7,
+        NR_GLOBAL_CTX_BUF       = 8
+};
+/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
+enum  /*global_ctx_buffer_va */ {
+        CIRCULAR_VA             = 0,
+        PAGEPOOL_VA             = 1,
+        ATTRIBUTE_VA            = 2,
+        GOLDEN_CTX_VA           = 3,
+        PRIV_ACCESS_MAP_VA      = 4,
+        NR_GLOBAL_CTX_BUF_VA    = 5
+};
+enum {
+        WAIT_UCODE_LOOP,
+        WAIT_UCODE_TIMEOUT,
+        WAIT_UCODE_ERROR,
+        WAIT_UCODE_OK
+};
+enum {
+        GR_IS_UCODE_OP_EQUAL,
+        GR_IS_UCODE_OP_NOT_EQUAL,
+        GR_IS_UCODE_OP_AND,
+        GR_IS_UCODE_OP_LESSER,
+        GR_IS_UCODE_OP_LESSER_EQUAL,
+        GR_IS_UCODE_OP_SKIP
+};
+enum {
+        eUcodeHandshakeInitComplete = 1,
+        eUcodeHandshakeMethodFinished
+};
+enum {
+        ELCG_MODE = (1 << 0),
+        BLCG_MODE = (1 << 1),
+        INVALID_MODE = (1 << 2)
+};
+enum {
+        ELCG_RUN,       /* clk always run, i.e. disable elcg */
+        ELCG_STOP,      /* clk is stopped */
+        ELCG_AUTO       /* clk will run when non-idle, standard elcg mode */
+};
+enum {
+        BLCG_RUN,       /* clk always run, i.e. disable blcg */
+        BLCG_AUTO       /* clk will run when non-idle, standard blcg mode */
+};
+enum {
+        NVGPU_EVENT_ID_BPT_INT = 0,
+        NVGPU_EVENT_ID_BPT_PAUSE,
+        NVGPU_EVENT_ID_BLOCKING_SYNC,
+        NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED,
+        NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE,
+        NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN,
+        NVGPU_EVENT_ID_MAX,
+};
+#ifndef GR_GO_IDLE_BUNDLE
+#define GR_GO_IDLE_BUNDLE       0x0000e100 /* --V-B */
+#endif
+struct gr_channel_map_tlb_entry {
+        u32 curr_ctx;
+        u32 chid;
+        u32 tsgid;
+};
+struct gr_zcull_gk20a {
+        u32 aliquot_width;
+        u32 aliquot_height;
+        u32 aliquot_size;
+        u32 total_aliquots;
+        u32 width_align_pixels;
+        u32 height_align_pixels;
+        u32 pixel_squares_by_aliquots;
+};
+struct gr_zcull_info {
+        u32 width_align_pixels;
+        u32 height_align_pixels;
+        u32 pixel_squares_by_aliquots;
+        u32 aliquot_total;
+        u32 region_byte_multiplier;
+        u32 region_header_size;
+        u32 subregion_header_size;
+        u32 subregion_width_align_pixels;
+        u32 subregion_height_align_pixels;
+        u32 subregion_count;
+};
+#define GK20A_ZBC_COLOR_VALUE_SIZE      4  /* RGBA */
+#define GK20A_STARTOF_ZBC_TABLE         1   /* index zero reserved to indicate "not ZBCd" */
+#define GK20A_SIZEOF_ZBC_TABLE          16  /* match ltcs_ltss_dstg_zbc_index_address width (4) */
+#define GK20A_ZBC_TABLE_SIZE            (16 - 1)
+#define GK20A_ZBC_TYPE_INVALID          0
+#define GK20A_ZBC_TYPE_COLOR            1
+#define GK20A_ZBC_TYPE_DEPTH            2
+#define T19X_ZBC                        3
+struct zbc_color_table {
+        u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
+        u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
+        u32 format;
+        u32 ref_cnt;
+};
+struct zbc_depth_table {
+        u32 depth;
+        u32 format;
+        u32 ref_cnt;
+};
+struct zbc_entry {
+        u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
+        u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
+        u32 depth;
+        u32 type;       /* color or depth */
+        u32 format;
+};
+struct zbc_query_params {
+        u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
+        u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
+        u32 depth;
+        u32 ref_cnt;
+        u32 format;
+        u32 type;       /* color or depth */
+        u32 index_size; /* [out] size, [in] index */
+};
+struct sm_info {
+        u32 gpc_index;
+        u32 tpc_index;
+        u32 sm_index;
+        u32 global_tpc_index;
+};
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+struct gk20a_cs_snapshot_client;
+struct gk20a_cs_snapshot;
+#endif
+struct gr_gk20a_isr_data {
+        u32 addr;
+        u32 data_lo;
+        u32 data_hi;
+        u32 curr_ctx;
+        u32 chid;
+        u32 offset;
+        u32 sub_chan;
+        u32 class_num;
+};
+struct gr_ctx_buffer_desc {
+        void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);
+        struct nvgpu_mem mem;
+        void *priv;
+};
+struct nvgpu_preemption_modes_rec {
+        u32 graphics_preemption_mode_flags; /* supported preemption modes */
+        u32 compute_preemption_mode_flags; /* supported preemption modes */
+        u32 default_graphics_preempt_mode; /* default mode */
+        u32 default_compute_preempt_mode; /* default mode */
+};
+struct nvgpu_gr_sm_error_state {
+        u32 hww_global_esr;
+        u32 hww_warp_esr;
+        u64 hww_warp_esr_pc;
+        u32 hww_global_esr_report_mask;
+        u32 hww_warp_esr_report_mask;
+};
+struct gr_gk20a {
+        struct gk20a *g;
+        struct {
+                bool dynamic;
+                u32 buffer_size;
+                u32 buffer_total_size;
+                bool golden_image_initialized;
+                u32 golden_image_size;
+                u32 *local_golden_image;
+                u32 hwpm_ctxsw_buffer_offset_map_count;
+                struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;
+                u32 zcull_ctxsw_image_size;
+                u32 pm_ctxsw_image_size;
+                u32 buffer_header_size;
+                u32 priv_access_map_size;
+                struct gr_ucode_gk20a ucode;
+                struct av_list_gk20a  sw_bundle_init;
+                struct av_list_gk20a  sw_method_init;
+                struct aiv_list_gk20a sw_ctx_load;
+                struct av_list_gk20a  sw_non_ctx_load;
+                struct av_list_gk20a  sw_veid_bundle_init;
+                struct {
+                        struct aiv_list_gk20a sys;
+                        struct aiv_list_gk20a gpc;
+                        struct aiv_list_gk20a tpc;
+                        struct aiv_list_gk20a zcull_gpc;
+                        struct aiv_list_gk20a ppc;
+                        struct aiv_list_gk20a pm_sys;
+                        struct aiv_list_gk20a pm_gpc;
+                        struct aiv_list_gk20a pm_tpc;
+                        struct aiv_list_gk20a pm_ppc;
+                        struct aiv_list_gk20a perf_sys;
+                        struct aiv_list_gk20a perf_gpc;
+                        struct aiv_list_gk20a fbp;
+                        struct aiv_list_gk20a fbp_router;
+                        struct aiv_list_gk20a gpc_router;
+                        struct aiv_list_gk20a pm_ltc;
+                        struct aiv_list_gk20a pm_fbpa;
+                        struct aiv_list_gk20a perf_sys_router;
+                        struct aiv_list_gk20a perf_pma;
+                        struct aiv_list_gk20a pm_rop;
+                        struct aiv_list_gk20a pm_ucgpc;
+                        struct aiv_list_gk20a etpc;
+                } ctxsw_regs;
+                int regs_base_index;
+                bool valid;
+        } ctx_vars;
+        struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
+        struct nvgpu_mutex fecs_mutex; /* protect fecs method */
+#define GR_NETLIST_DYNAMIC      -1
+#define GR_NETLIST_STATIC_A     'A'
+        int netlist;
+        struct nvgpu_cond init_wq;
+        int initialized;
+        u32 num_fbps;
+        u32 comptags_per_cacheline;
+        u32 slices_per_ltc;
+        u32 cacheline_size;
+        u32 gobs_per_comptagline_per_slice;
+        u32 max_gpc_count;
+        u32 max_fbps_count;
+        u32 max_tpc_per_gpc_count;
+        u32 max_zcull_per_gpc_count;
+        u32 max_tpc_count;
+        u32 sys_count;
+        u32 gpc_count;
+        u32 pe_count_per_gpc;
+        u32 ppc_count;
+        u32 *gpc_ppc_count;
+        u32 tpc_count;
+        u32 *gpc_tpc_count;
+        u32 *gpc_tpc_mask;
+        u32 zcb_count;
+        u32 *gpc_zcb_count;
+        u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
+        u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
+        u32 *gpc_skip_mask;
+        u32 bundle_cb_default_size;
+        u32 min_gpm_fifo_depth;
+        u32 bundle_cb_token_limit;
+        u32 attrib_cb_default_size;
+        u32 attrib_cb_size;
+        u32 alpha_cb_default_size;
+        u32 alpha_cb_size;
+        u32 timeslice_mode;
+        u32 czf_bypass;
+        u32 pd_max_batches;
+        u32 gfxp_wfi_timeout_count;
+        struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
+        struct nvgpu_mem mmu_wr_mem;
+        struct nvgpu_mem mmu_rd_mem;
+        u8 *map_tiles;
+        u32 map_tile_count;
+        u32 map_row_offset;
+        u32 max_comptag_mem; /* max memory size (MB) for comptag */
+        struct compbit_store_desc compbit_store;
+        struct gk20a_comptag_allocator comp_tags;
+        struct gr_zcull_gk20a zcull;
+        struct nvgpu_mutex zbc_lock;
+        struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
+        struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
+#ifdef CONFIG_TEGRA_19x_GPU
+        struct zbc_s_table zbc_s_tbl[GK20A_ZBC_TABLE_SIZE];
+#endif
+        s32 max_default_color_index;
+        s32 max_default_depth_index;
+#ifdef CONFIG_TEGRA_19x_GPU
+        s32 max_default_s_index;
+#endif
+        u32 max_used_color_index;
+        u32 max_used_depth_index;
+#ifdef CONFIG_TEGRA_19x_GPU
+        u32 max_used_s_index;
+#endif
+#define GR_CHANNEL_MAP_TLB_SIZE         2 /* must of power of 2 */
+        struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
+        u32 channel_tlb_flush_index;
+        struct nvgpu_spinlock ch_tlb_lock;
+        void (*remove_support)(struct gr_gk20a *gr);
+        bool sw_ready;
+        bool skip_ucode_init;
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        struct gr_t18x t18x;
+#endif
+        u32 fbp_en_mask;
+        u32 *fbp_rop_l2_en_mask;
+        u32 no_of_sm;
+        struct sm_info *sm_to_cluster;
+        struct nvgpu_gr_sm_error_state *sm_error_states;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        struct nvgpu_mutex                      cs_lock;
+        struct gk20a_cs_snapshot        *cs_data;
+#endif
+        u32 max_css_buffer_size;
+};
+void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
+struct gr_ctx_desc {
+        struct nvgpu_mem mem;
+        u32 graphics_preempt_mode;
+        u32 compute_preempt_mode;
+        bool boosted_ctx;
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        struct gr_ctx_desc_t18x t18x;
+#endif
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+        u64 virt_ctx;
+#endif
+};
+struct ctx_header_desc {
+        struct nvgpu_mem mem;
+};
+struct gk20a_ctxsw_ucode_segment {
+        u32 offset;
+        u32 size;
+};
+struct gk20a_ctxsw_ucode_segments {
+        u32 boot_entry;
+        u32 boot_imem_offset;
+        u32 boot_signature;
+        struct gk20a_ctxsw_ucode_segment boot;
+        struct gk20a_ctxsw_ucode_segment code;
+        struct gk20a_ctxsw_ucode_segment data;
+};
+/* sums over the ucode files as sequences of u32, computed to the
+ * boot_signature field in the structure above */
+/* T18X FECS remains same as T21X,
+ * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
+ * for T18X*/
+#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED       0x68edab34
+#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE       0x9121ab5c
+#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED        0x9125ab5c
+#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED        0x8a621f78
+#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED     0x67e5344b
+#define FALCON_UCODE_SIG_T12X_FECS_OLDER                0x56da09f
+#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED       0x3d3d65e2
+#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED       0x303465d5
+#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED    0x3fdd33d3
+#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER               0x53d7877
+#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED     0x93671b7d
+#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2    0x4d6cbc10
+#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED    0x393161da
+struct gk20a_ctxsw_ucode_info {
+        u64 *p_va;
+        struct nvgpu_mem inst_blk_desc;
+        struct nvgpu_mem surface_desc;
+        struct gk20a_ctxsw_ucode_segments fecs;
+        struct gk20a_ctxsw_ucode_segments gpccs;
+};
+struct gk20a_ctxsw_bootloader_desc {
+        u32 start_offset;
+        u32 size;
+        u32 imem_offset;
+        u32 entry_point;
+};
+struct fecs_method_op_gk20a {
+        struct {
+                u32 addr;
+                u32 data;
+        } method;
+        struct {
+                u32 id;
+                u32 data;
+                u32 clr;
+                u32 *ret;
+                u32 ok;
+                u32 fail;
+        } mailbox;
+        struct {
+                u32 ok;
+                u32 fail;
+        } cond;
+};
+struct nvgpu_warpstate {
+        u64 valid_warps[2];
+        u64 trapped_warps[2];
+        u64 paused_warps[2];
+};
+struct gpu_ops;
+int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
+                                        struct channel_gk20a *c);
+void gk20a_init_gr(struct gk20a *g);
+int gk20a_init_gr_support(struct gk20a *g);
+int gk20a_enable_gr_hw(struct gk20a *g);
+int gk20a_gr_reset(struct gk20a *g);
+void gk20a_gr_wait_initialized(struct gk20a *g);
+int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
+int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
+int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags);
+void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
+int gk20a_gr_isr(struct gk20a *g);
+int gk20a_gr_nonstall_isr(struct gk20a *g);
+/* zcull */
+u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
+int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
+                        struct channel_gk20a *c, u64 zcull_va, u32 mode);
+int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
+                        struct gr_zcull_info *zcull_params);
+void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
+                                        u32 *zcull_map_tiles);
+/* zbc */
+int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
+                        struct zbc_entry *zbc_val);
+int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
+                        struct zbc_query_params *query_params);
+int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
+                        struct zbc_entry *zbc_val);
+int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
+/* pmu */
+int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
+int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
+                struct nvgpu_mem *inst_block);
+int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
+void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine);
+void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine);
+void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);
+/* sm */
+bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
+u32 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g);
+#define gr_gk20a_elpg_protected_call(g, func) \
+        ({ \
+                int err = 0; \
+                if (g->support_pmu && g->elpg_enabled) {\
+                        err = nvgpu_pmu_disable_elpg(g); \
+                        if (err) \
+                                nvgpu_pmu_enable_elpg(g); \
+                } \
+                if (!err) { \
+                        err = func; \
+                        if (g->support_pmu && g->elpg_enabled) \
+                                nvgpu_pmu_enable_elpg(g); \
+                } \
+                err; \
+        })
+int gk20a_gr_suspend(struct gk20a *g);
+struct nvgpu_dbg_gpu_reg_op;
+int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+                          struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+                          u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
+int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+                            struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+                            u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
+                            bool ch_is_curr_ctx);
+int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
+                                    u32 addr,
+                                    u32 max_offsets,
+                                    u32 *offsets, u32 *offset_addrs,
+                                    u32 *num_offsets,
+                                    bool is_quad, u32 quad);
+int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
+                                       u32 addr,
+                                       u32 max_offsets,
+                                       u32 *offsets, u32 *offset_addrs,
+                                       u32 *num_offsets);
+int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
+                                    struct channel_gk20a *c,
+                                    bool enable_smpc_ctxsw);
+int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
+                                  struct channel_gk20a *c,
+                                  bool enable_hwpm_ctxsw);
+struct channel_ctx_gk20a;
+void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
+                                    u32 addr, u32 data, bool patch);
+int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
+                                          struct channel_ctx_gk20a *ch_ctx,
+                                          bool update_patch_count);
+void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
+                                        struct channel_ctx_gk20a *ch_ctx,
+                                        bool update_patch_count);
+void gr_gk20a_commit_global_pagepool(struct gk20a *g,
+                                     struct channel_ctx_gk20a *ch_ctx,
+                                     u64 addr, u32 size, bool patch);
+void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
+void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
+int gr_gk20a_init_fs_state(struct gk20a *g);
+int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
+int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
+int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
+void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
+void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
+        struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
+void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
+        struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
+void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
+int gr_gk20a_disable_ctxsw(struct gk20a *g);
+int gr_gk20a_enable_ctxsw(struct gk20a *g);
+void gk20a_gr_resume_single_sm(struct gk20a *g,
+                u32 gpc, u32 tpc, u32 sm);
+void gk20a_gr_resume_all_sms(struct gk20a *g);
+void gk20a_gr_suspend_single_sm(struct gk20a *g,
+                u32 gpc, u32 tpc, u32 sm,
+                u32 global_esr_mask, bool check_errors);
+void gk20a_gr_suspend_all_sms(struct gk20a *g,
+                u32 global_esr_mask, bool check_errors);
+u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
+int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
+        struct channel_gk20a *ch, u64 sms, bool enable);
+bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
+int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
+                           struct zbc_entry *color_val, u32 index);
+int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
+                           struct zbc_entry *depth_val, u32 index);
+int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
+                        struct zbc_entry *zbc_val);
+void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
+int gr_gk20a_wait_idle(struct gk20a *g, unsigned long duration_ms,
+                       u32 expect_delay);
+int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+                bool *post_event, struct channel_gk20a *fault_ch,
+                u32 *hww_global_esr);
+int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
+                                        bool *post_event);
+int gr_gk20a_init_ctx_state(struct gk20a *g);
+int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
+                                   struct fecs_method_op_gk20a op,
+                                   bool sleepduringwait);
+int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
+                struct fecs_method_op_gk20a op);
+int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
+                          struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm,
+                          u32 class, u32 padding);
+void gr_gk20a_free_gr_ctx(struct gk20a *g,
+                          struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
+int gr_gk20a_halt_pipe(struct gk20a *g);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
+                        u32 dmabuf_fd,              /* in - dma mapped memory */
+                        u32 perfmon_id_count,       /* in - number of perfmons*/
+                        u32 *perfmon_id_start,      /* out- index of first pm */
+                        /* out - pointer to client data used in later     */
+                        struct gk20a_cs_snapshot_client **css_client);
+int gr_gk20a_css_detach(struct channel_gk20a *ch,
+                                struct gk20a_cs_snapshot_client *css_client);
+int gr_gk20a_css_flush(struct channel_gk20a *ch,
+                                struct gk20a_cs_snapshot_client *css_client);
+void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
+#else
+/* fake empty cleanup function if no cyclestats snapshots enabled */
+static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
+{
+        (void)g;
+}
+#endif
+int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
+                struct gr_gk20a_isr_data *isr_data);
+int gk20a_gr_lock_down_sm(struct gk20a *g,
+                         u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
+                         bool check_errors);
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+                u32 global_esr_mask, bool check_errors);
+int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
+                            u32 *mailbox_ret, u32 opc_success,
+                            u32 mailbox_ok, u32 opc_fail,
+                            u32 mailbox_fail, bool sleepduringwait);
+int gr_gk20a_get_ctx_id(struct gk20a *g,
+                struct channel_gk20a *c,
+                u32 *ctx_id);
+u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
+u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
+int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms,
+                          u32 expect_delay);
+struct dbg_session_gk20a;
+bool gr_gk20a_suspend_context(struct channel_gk20a *ch);
+bool gr_gk20a_resume_context(struct channel_gk20a *ch);
+int gr_gk20a_suspend_contexts(struct gk20a *g,
+                              struct dbg_session_gk20a *dbg_s,
+                              int *ctx_resident_ch_fd);
+int gr_gk20a_resume_contexts(struct gk20a *g,
+                              struct dbg_session_gk20a *dbg_s,
+                              int *ctx_resident_ch_fd);
+void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
+void gk20a_gr_enable_exceptions(struct gk20a *g);
+int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch);
+int gr_gk20a_trigger_suspend(struct gk20a *g);
+int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state);
+int gr_gk20a_resume_from_pause(struct gk20a *g);
+int gr_gk20a_clear_sm_errors(struct gk20a *g);
+u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
+int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
+void gr_gk20a_init_sm_id_table(struct gk20a *g);
+int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
+void gr_gk20a_write_zcull_ptr(struct gk20a *g,
+                                struct nvgpu_mem *mem, u64 gpu_va);
+void gr_gk20a_write_pm_ptr(struct gk20a *g,
+                                struct nvgpu_mem *mem, u64 gpu_va);
+u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
+u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
+void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
+                                u32 *esr_sm_sel);
+void gk20a_gr_init_ovr_sm_dsm_perf(void);
+void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
+                                               u32 **ovr_perf_regs);
+void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
+                                        struct nvgpu_mem *mem);
+u32 gr_gk20a_get_patch_slots(struct gk20a *g);
+#endif /*__GR_GK20A_H__*/