From 4b5b67d6d83430d8d670660b1dfc9cf024d60d88 Mon Sep 17 00:00:00 2001
From: Sunny He <suhe@nvidia.com>
Date: Thu, 17 Aug 2017 16:11:34 -0700
Subject: gpu: nvgpu: Reorg gr HAL initialization

Reorganize HAL initialization to remove inheritance and construct
the gpu_ops struct at compile time. This patch only covers the
gr sub-module of the gpu_ops struct.

Perform HAL function assignments in hal_gxxxx.c through the
population of a chip-specific copy of gpu_ops.

Jira NVGPU-74

Change-Id: Ie37638f442fd68aca8a7ade5f297118447bdc91e
Signed-off-by: Sunny He <suhe@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1542989
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c  | 136 +++++++++++-------------------------
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h  |  86 ++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 129 +++++++++++++++++++++++++++++++++-
 3 files changed, 253 insertions(+), 98 deletions(-)

(limited to 'drivers/gpu/nvgpu/gp10b')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 05fbeb21..74af9817 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -41,7 +41,7 @@
 
 #define NVGPU_GFXP_WFI_TIMEOUT_US	100LL
 
-static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
+bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
 
@@ -67,7 +67,7 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
-static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
+bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
 {
 	if (class_num == PASCAL_A ||  class_num == MAXWELL_B)
 		return true;
@@ -75,7 +75,7 @@ static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
 		return false;
 }
 
-static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
+bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
 {
 	if (class_num == PASCAL_COMPUTE_A ||  class_num == MAXWELL_COMPUTE_B)
 		return true;
@@ -119,7 +119,7 @@ static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
 		*count_to_adjust = 0;
 }
 
-static int gr_gp10b_handle_sm_exception(struct gk20a *g,
+int gr_gp10b_handle_sm_exception(struct gk20a *g,
 			u32 gpc, u32 tpc, u32 sm,
 			bool *post_event, struct channel_gk20a *fault_ch,
 			u32 *hww_global_esr)
@@ -244,7 +244,7 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g,
 	return ret;
 }
 
-static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
+int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 		bool *post_event)
 {
 	int ret = 0;
@@ -380,7 +380,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	return ret;
 }
 
-static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
+int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			struct channel_gk20a *c, bool patch)
 {
 	struct gr_gk20a *gr = &g->gr;
@@ -481,7 +481,7 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 	return 0;
 }
 
-static void gr_gp10b_commit_global_pagepool(struct gk20a *g,
+void gr_gp10b_commit_global_pagepool(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u32 size, bool patch)
 {
@@ -499,7 +499,7 @@ static void gr_gp10b_commit_global_pagepool(struct gk20a *g,
 		gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
 }
 
-static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
+int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 				  struct zbc_entry *color_val, u32 index)
 {
 	u32 i;
@@ -554,7 +554,7 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 	return 0;
 }
 
-static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
+int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 				struct zbc_entry *depth_val, u32 index)
 {
 	u32 zbc_z;
@@ -592,12 +592,12 @@ static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 	return 0;
 }
 
-static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
+u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
 {
 	return gr_scc_pagepool_total_pages_hwmax_value_v();
 }
 
-static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
+int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 	int size;
@@ -642,7 +642,7 @@ static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
 	gk20a_dbg_fn("done");
 }
 
-static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
+void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
 {
 	u32 val;
 
@@ -667,7 +667,7 @@ static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
 	gk20a_writel(g, gr_bes_crop_debug3_r(), val);
 }
 
-static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
+int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
 				     u32 class_num, u32 offset, u32 data)
 {
 	gk20a_dbg_fn("");
@@ -718,7 +718,7 @@ fail:
 	return -EINVAL;
 }
 
-static void gr_gp10b_cb_size_default(struct gk20a *g)
+void gr_gp10b_cb_size_default(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
@@ -728,7 +728,7 @@ static void gr_gp10b_cb_size_default(struct gk20a *g)
 		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
 }
 
-static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
+void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val;
@@ -776,7 +776,7 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 	}
 }
 
-static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
+void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val;
@@ -843,7 +843,7 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 	}
 }
 
-static int gr_gp10b_init_ctx_state(struct gk20a *g)
+int gr_gp10b_init_ctx_state(struct gk20a *g)
 {
 	struct fecs_method_op_gk20a op = {
 		.mailbox = { .id = 0, .data = 0,
@@ -910,7 +910,7 @@ fail_free:
 	return err;
 }
 
-static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
+int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
 				struct gr_ctx_desc *gr_ctx,
 				struct vm_gk20a *vm, u32 class,
 				u32 graphics_preempt_mode,
@@ -1034,7 +1034,7 @@ fail:
 	return err;
 }
 
-static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
 			  u32 class,
 			  u32 flags)
@@ -1131,7 +1131,7 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
 	nvgpu_mem_end(g, mem);
 }
 
-static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 			  struct gr_ctx_desc *gr_ctx)
 {
 	gk20a_dbg_fn("");
@@ -1151,7 +1151,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 }
 
 
-static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
+void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		struct channel_ctx_gk20a *ch_ctx,
 		struct nvgpu_mem *mem)
 {
@@ -1256,7 +1256,7 @@ out:
 	gk20a_dbg_fn("done");
 }
 
-static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
+int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
 			   struct gk20a_debug_output *o)
 {
 	struct gr_gk20a *gr = &g->gr;
@@ -1402,7 +1402,7 @@ static bool gr_activity_empty_or_preempted(u32 val)
 	return true;
 }
 
-static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
+int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
 			       u32 expect_delay)
 {
 	u32 delay = expect_delay;
@@ -1453,7 +1453,7 @@ static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
 	return -EAGAIN;
 }
 
-static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
+void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
 					     struct channel_ctx_gk20a *ch_ctx,
 					     u64 addr, bool patch)
 {
@@ -1481,7 +1481,7 @@ static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
 		gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
 }
 
-static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
+void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u64 size, bool patch)
 {
@@ -1516,7 +1516,7 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 		gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
 }
 
-static int gr_gp10b_load_smid_config(struct gk20a *g)
+int gr_gp10b_load_smid_config(struct gk20a *g)
 {
 	u32 *tpc_sm_id;
 	u32 i, j;
@@ -1586,7 +1586,7 @@ int gr_gp10b_init_fs_state(struct gk20a *g)
 	return gr_gm20b_init_fs_state(g);
 }
 
-static void gr_gp10b_init_cyclestats(struct gk20a *g)
+void gr_gp10b_init_cyclestats(struct gk20a *g)
 {
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	g->gpu_characteristics.flags |=
@@ -1598,7 +1598,7 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g)
 #endif
 }
 
-static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 {
 	nvgpu_tegra_fuse_write_bypass(g, 0x1);
 	nvgpu_tegra_fuse_write_access_sw(g, 0x0);
@@ -1611,7 +1611,7 @@ static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 		nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
 }
 
-static void gr_gp10b_get_access_map(struct gk20a *g,
+void gr_gp10b_get_access_map(struct gk20a *g,
 				   u32 **whitelist, int *num_entries)
 {
 	static u32 wl_addr_gp10b[] = {
@@ -1801,7 +1801,7 @@ static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
  *
  * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
  */
-static int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
+int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
 		u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
 		bool sm_debugger_attached, struct channel_gk20a *fault_ch,
 		bool *early_exit, bool *ignore_debugger)
@@ -1988,7 +1988,7 @@ clean_up:
 	return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
 }
 
-static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
+u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
 			u32 gpc, u32 tpc, u32 sm)
 {
 	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
@@ -2003,7 +2003,7 @@ static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
 	return hww_warp_esr;
 }
 
-static u32 get_ecc_override_val(struct gk20a *g)
+u32 get_ecc_override_val(struct gk20a *g)
 {
 	u32 val;
 
@@ -2046,7 +2046,7 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
 	return ctx_resident;
 }
 
-static int gr_gp10b_suspend_contexts(struct gk20a *g,
+int gr_gp10b_suspend_contexts(struct gk20a *g,
 				struct dbg_session_gk20a *dbg_s,
 				int *ctx_resident_ch_fd)
 {
@@ -2122,7 +2122,7 @@ clean_up:
 	return err;
 }
 
-static int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
+int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
 				    bool boost)
 {
 	struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
@@ -2156,7 +2156,7 @@ unmap_ctx:
 	return err;
 }
 
-static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
+void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
 				       struct gr_ctx_desc *gr_ctx) {
 	u32 v;
 
@@ -2165,7 +2165,7 @@ static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
 	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
 }
 
-static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
+int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 					u32 graphics_preempt_mode,
 					u32 compute_preempt_mode)
 {
@@ -2261,7 +2261,7 @@ unamp_ctx_header:
 	return err;
 }
 
-static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
+int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
 	struct nvgpu_preemption_modes_rec *preemption_modes_rec)
 {
 	preemption_modes_rec->graphics_preemption_mode_flags = (
@@ -2279,7 +2279,7 @@ static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
 
 	return 0;
 }
-static int gp10b_gr_fuse_override(struct gk20a *g)
+int gp10b_gr_fuse_override(struct gk20a *g)
 {
 	struct device_node *np = dev_from_gk20a(g)->of_node;
 	u32 *fuses;
@@ -2319,7 +2319,7 @@ static int gp10b_gr_fuse_override(struct gk20a *g)
 	return 0;
 }
 
-static int gr_gp10b_init_preemption_state(struct gk20a *g)
+int gr_gp10b_init_preemption_state(struct gk20a *g)
 {
 	u32 debug_2;
 	u64 sysclk_rate;
@@ -2341,7 +2341,7 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g)
 	return 0;
 }
 
-static void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
+void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
 			struct nvgpu_mem *mem, u64 gpu_va)
 {
 	u32 va = u64_lo32(gpu_va >> 8);
@@ -2367,59 +2367,3 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
 
 	return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
 }
-
-void gp10b_init_gr(struct gk20a *g)
-{
-	struct gpu_ops *gops = &g->ops;
-
-	gm20b_init_gr(g);
-	gops->gr.init_fs_state = gr_gp10b_init_fs_state;
-	gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
-	gops->gr.is_valid_class = gr_gp10b_is_valid_class;
-	gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class;
-	gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class;
-	gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
-	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
-	gops->gr.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va;
-	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
-	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
-	gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
-	gops->gr.calc_global_ctx_buffer_size =
-		gr_gp10b_calc_global_ctx_buffer_size;
-	gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb;
-	gops->gr.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb;
-	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
-	gops->gr.cb_size_default = gr_gp10b_cb_size_default;
-	gops->gr.set_alpha_circular_buffer_size =
-		gr_gp10b_set_alpha_circular_buffer_size;
-	gops->gr.set_circular_buffer_size =
-		gr_gp10b_set_circular_buffer_size;
-	gops->gr.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3;
-	gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
-	gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
-	gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
-	gops->gr.update_ctxsw_preemption_mode =
-		gr_gp10b_update_ctxsw_preemption_mode;
-	gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
-	gops->gr.wait_empty = gr_gp10b_wait_empty;
-	gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
-	gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
-	gops->gr.get_access_map = gr_gp10b_get_access_map;
-	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
-	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
-	gops->gr.pre_process_sm_exception =
-		gr_gp10b_pre_process_sm_exception;
-	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
-	gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
-	gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
-	gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
-	gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
-	gops->gr.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode;
-	gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
-	gops->gr.fuse_override = gp10b_gr_fuse_override;
-	gops->gr.load_smid_config = gr_gp10b_load_smid_config;
-	gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
-	gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
-	gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
-	gops->gr.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr;
-}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index ac53e231..ce1ca01f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -20,6 +20,10 @@
 
 struct gk20a;
 struct gr_gk20a_isr_data;
+struct channel_ctx_gk20a;
+struct zbc_entry;
+struct gr_ctx_desc;
+struct nvgpu_preemption_modes_rec;
 
 enum {
 	PASCAL_CHANNEL_GPFIFO_A  = 0xC06F,
@@ -39,7 +43,6 @@ enum {
 #define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
 #define NVC0C0_SET_RD_COALESCE			0x0228
 
-void gp10b_init_gr(struct gk20a *g);
 int gr_gp10b_init_fs_state(struct gk20a *g);
 int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 			struct nvgpu_mem *mem);
@@ -50,6 +53,87 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
 int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
 		struct channel_gk20a *fault_ch);
 
+bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num);
+bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
+bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num);
+int gr_gp10b_handle_sm_exception(struct gk20a *g,
+			u32 gpc, u32 tpc, u32 sm,
+			bool *post_event, struct channel_gk20a *fault_ch,
+			u32 *hww_global_esr);
+int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
+		bool *post_event);
+int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
+			struct channel_gk20a *c, bool patch);
+void gr_gp10b_commit_global_pagepool(struct gk20a *g,
+					    struct channel_ctx_gk20a *ch_ctx,
+					    u64 addr, u32 size, bool patch);
+int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
+				  struct zbc_entry *color_val, u32 index);
+int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
+				struct zbc_entry *depth_val, u32 index);
+u32 gr_gp10b_pagepool_default_size(struct gk20a *g);
+int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g);
+void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
+int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
+				     u32 class_num, u32 offset, u32 data);
+void gr_gp10b_cb_size_default(struct gk20a *g);
+void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
+void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
+int gr_gp10b_init_ctx_state(struct gk20a *g);
+int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
+				struct gr_ctx_desc *gr_ctx,
+				struct vm_gk20a *vm, u32 class,
+				u32 graphics_preempt_mode,
+				u32 compute_preempt_mode);
+int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+			  u32 class,
+			  u32 flags);
+void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+			  struct gr_ctx_desc *gr_ctx);
+void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
+		struct channel_ctx_gk20a *ch_ctx,
+		struct nvgpu_mem *mem);
+int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
+			   struct gk20a_debug_output *o);
+int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
+			       u32 expect_delay);
+void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
+					     struct channel_ctx_gk20a *ch_ctx,
+					     u64 addr, bool patch);
+void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
+					    struct channel_ctx_gk20a *ch_ctx,
+					    u64 addr, u64 size, bool patch);
+int gr_gp10b_load_smid_config(struct gk20a *g);
+void gr_gp10b_init_cyclestats(struct gk20a *g);
+void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
+void gr_gp10b_get_access_map(struct gk20a *g,
+				   u32 **whitelist, int *num_entries);
+int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
+		u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
+		bool sm_debugger_attached, struct channel_gk20a *fault_ch,
+		bool *early_exit, bool *ignore_debugger);
+u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
+			u32 gpc, u32 tpc, u32 sm);
+u32 get_ecc_override_val(struct gk20a *g);
+int gr_gp10b_suspend_contexts(struct gk20a *g,
+				struct dbg_session_gk20a *dbg_s,
+				int *ctx_resident_ch_fd);
+int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
+				    bool boost);
+void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
+				       struct gr_ctx_desc *gr_ctx);
+int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
+					u32 graphics_preempt_mode,
+					u32 compute_preempt_mode);
+int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
+	struct nvgpu_preemption_modes_rec *preemption_modes_rec);
+int gp10b_gr_fuse_override(struct gk20a *g);
+int gr_gp10b_init_preemption_state(struct gk20a *g);
+void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
+			struct nvgpu_mem *mem, u64 gpu_va);
+int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch);
+
 struct gr_t18x {
 	struct {
 		u32 preempt_image_size;
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 69a90031..d0f07a2b 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -27,6 +27,7 @@
 #include "gk20a/mc_gk20a.h"
 #include "gk20a/fb_gk20a.h"
 #include "gk20a/pmu_gk20a.h"
+#include "gk20a/gr_gk20a.h"
 
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/fecs_trace_gp10b.h"
@@ -179,6 +180,128 @@ static const struct gpu_ops gp10b_ops = {
 		.isr_stall = gp10b_ce_isr,
 		.isr_nonstall = gp10b_ce_nonstall_isr,
 	},
+	.gr = {
+		.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
+		.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
+		.cb_size_default = gr_gp10b_cb_size_default,
+		.calc_global_ctx_buffer_size =
+			gr_gp10b_calc_global_ctx_buffer_size,
+		.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
+		.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
+		.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
+		.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
+		.handle_sw_method = gr_gp10b_handle_sw_method,
+		.set_alpha_circular_buffer_size =
+			gr_gp10b_set_alpha_circular_buffer_size,
+		.set_circular_buffer_size = gr_gp10b_set_circular_buffer_size,
+		.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
+		.is_valid_class = gr_gp10b_is_valid_class,
+		.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class,
+		.is_valid_compute_class = gr_gp10b_is_valid_compute_class,
+		.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
+		.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
+		.init_fs_state = gr_gp10b_init_fs_state,
+		.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
+		.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
+		.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
+		.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
+		.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
+		.free_channel_ctx = gk20a_free_channel_ctx,
+		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
+		.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
+		.get_zcull_info = gr_gk20a_get_zcull_info,
+		.is_tpc_addr = gr_gm20b_is_tpc_addr,
+		.get_tpc_num = gr_gm20b_get_tpc_num,
+		.detect_sm_arch = gr_gm20b_detect_sm_arch,
+		.add_zbc_color = gr_gp10b_add_zbc_color,
+		.add_zbc_depth = gr_gp10b_add_zbc_depth,
+		.zbc_set_table = gk20a_gr_zbc_set_table,
+		.zbc_query_table = gr_gk20a_query_zbc,
+		.pmu_save_zbc = gk20a_pmu_save_zbc,
+		.add_zbc = gr_gk20a_add_zbc,
+		.pagepool_default_size = gr_gp10b_pagepool_default_size,
+		.init_ctx_state = gr_gp10b_init_ctx_state,
+		.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
+		.free_gr_ctx = gr_gp10b_free_gr_ctx,
+		.update_ctxsw_preemption_mode =
+			gr_gp10b_update_ctxsw_preemption_mode,
+		.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
+		.update_pc_sampling = gr_gm20b_update_pc_sampling,
+		.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
+		.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
+		.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
+		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
+		.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
+		.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
+		.wait_empty = gr_gp10b_wait_empty,
+		.init_cyclestats = gr_gp10b_init_cyclestats,
+		.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
+		.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
+		.bpt_reg_info = gr_gm20b_bpt_reg_info,
+		.get_access_map = gr_gp10b_get_access_map,
+		.handle_fecs_error = gr_gp10b_handle_fecs_error,
+		.handle_sm_exception = gr_gp10b_handle_sm_exception,
+		.handle_tex_exception = gr_gp10b_handle_tex_exception,
+		.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
+		.enable_exceptions = gk20a_gr_enable_exceptions,
+		.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
+		.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
+		.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
+		.record_sm_error_state = gm20b_gr_record_sm_error_state,
+		.update_sm_error_state = gm20b_gr_update_sm_error_state,
+		.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
+		.suspend_contexts = gr_gp10b_suspend_contexts,
+		.resume_contexts = gr_gk20a_resume_contexts,
+		.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
+		.fuse_override = gp10b_gr_fuse_override,
+		.init_sm_id_table = gr_gk20a_init_sm_id_table,
+		.load_smid_config = gr_gp10b_load_smid_config,
+		.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
+		.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
+		.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
+		.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
+		.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
+		.setup_rop_mapping = gr_gk20a_setup_rop_mapping,
+		.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
+		.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
+		.commit_inst = gr_gk20a_commit_inst,
+		.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
+		.write_pm_ptr = gr_gk20a_write_pm_ptr,
+		.init_elcg_mode = gr_gk20a_init_elcg_mode,
+		.load_tpc_mask = gr_gm20b_load_tpc_mask,
+		.inval_icache = gr_gk20a_inval_icache,
+		.trigger_suspend = gr_gk20a_trigger_suspend,
+		.wait_for_pause = gr_gk20a_wait_for_pause,
+		.resume_from_pause = gr_gk20a_resume_from_pause,
+		.clear_sm_errors = gr_gk20a_clear_sm_errors,
+		.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
+		.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
+		.sm_debugger_attached = gk20a_gr_sm_debugger_attached,
+		.suspend_single_sm = gk20a_gr_suspend_single_sm,
+		.suspend_all_sms = gk20a_gr_suspend_all_sms,
+		.resume_single_sm = gk20a_gr_resume_single_sm,
+		.resume_all_sms = gk20a_gr_resume_all_sms,
+		.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr,
+		.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
+		.get_sm_no_lock_down_hww_global_esr_mask =
+			gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
+		.lock_down_sm = gk20a_gr_lock_down_sm,
+		.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
+		.clear_sm_hww = gm20b_gr_clear_sm_hww,
+		.init_ovr_sm_dsm_perf =  gk20a_gr_init_ovr_sm_dsm_perf,
+		.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
+		.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
+		.set_boosted_ctx = gr_gp10b_set_boosted_ctx,
+		.set_preemption_mode = gr_gp10b_set_preemption_mode,
+		.set_czf_bypass = gr_gp10b_set_czf_bypass,
+		.pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
+		.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va,
+		.init_preemption_state = gr_gp10b_init_preemption_state,
+		.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
+		.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
+		.create_gr_sysfs = gr_gp10b_create_sysfs,
+		.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
+	},
 	.fb = {
 		.reset = fb_gk20a_reset,
 		.init_hw = gk20a_fb_init_hw,
@@ -474,6 +597,7 @@ int gp10b_init_hal(struct gk20a *g)
 
 	gops->ltc = gp10b_ops.ltc;
 	gops->ce2 = gp10b_ops.ce2;
+	gops->gr = gp10b_ops.gr;
 	gops->fb = gp10b_ops.fb;
 	gops->clock_gating = gp10b_ops.clock_gating;
 	gops->fifo = gp10b_ops.fifo;
@@ -564,6 +688,8 @@ int gp10b_init_hal(struct gk20a *g)
 		gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
 		gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap;
 		gops->pmu.is_priv_load = gp10b_is_priv_load;
+
+		gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
 	} else {
 		/* Inherit from gk20a */
 		gops->pmu.is_pmu_supported = gk20a_is_pmu_supported,
@@ -574,12 +700,13 @@ int gp10b_init_hal(struct gk20a *g)
 		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
 		gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
+
+		gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
 	}
 
 	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
 	g->pmu_lsf_pmu_wpr_init_done = 0;
 	g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
-	gp10b_init_gr(g);
 
 	gp10b_init_uncompressed_kind_map();
 	gp10b_init_kind_attr();
-- 
cgit v1.2.2