From 8c6a9fd1151299697037d58f33cfa306d8ac5d87 Mon Sep 17 00:00:00 2001
From: Sam Payne <spayne@nvidia.com>
Date: Fri, 31 Oct 2014 14:27:33 -0700
Subject: Revert "gpu: nvgpu: GR and LTC HAL to use const structs"

This reverts commit 41b82e97164138f45fbdaef6ab6939d82ca9419e.

Change-Id: Iabd01fcb124e0d22cd9be62151a6552cbb27fc94
Signed-off-by: Sam Payne <spayne@nvidia.com>
Reviewed-on: http://git-master/r/592221
Tested-by: Hoang Pham <hopham@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Mitch Luban <mluban@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c     |   4 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c |  13 ++-
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c    |   3 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c    |   4 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c         |   8 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h         | 114 ++++++++++++++++-----
 drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c   |   5 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      | 174 ++++++++++++++++----------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h      |   6 +-
 drivers/gpu/nvgpu/gk20a/gr_ops_gk20a.h  |  62 ------------
 drivers/gpu/nvgpu/gk20a/hal.c           |  10 +-
 drivers/gpu/nvgpu/gk20a/ltc_common.c    |   6 +-
 drivers/gpu/nvgpu/gk20a/ltc_gk20a.c     |  31 +++---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c      |   4 +-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c     |   4 +-
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c      |  91 +++++++++--------
 drivers/gpu/nvgpu/gm20b/gr_gm20b.h      |   8 +-
 drivers/gpu/nvgpu/gm20b/gr_ops_gm20b.h  |  79 ---------------
 drivers/gpu/nvgpu/gm20b/ltc_gm20b.c     |  38 ++++---
 drivers/gpu/nvgpu/gr_ops.h              | 103 -------------------
 drivers/gpu/nvgpu/nvgpu_gpuid.h         |  30 ------
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c        |  27 ++---
 drivers/gpu/nvgpu/vgpu/ltc_vgpu.c       |   8 +-
 drivers/gpu/nvgpu/vgpu/vgpu.c           |   3 +-
 24 files changed, 306 insertions(+), 529 deletions(-)
 delete mode 100644 drivers/gpu/nvgpu/gk20a/gr_ops_gk20a.h
 delete mode 100644 drivers/gpu/nvgpu/gm20b/gr_ops_gm20b.h
 delete mode 100644 drivers/gpu/nvgpu/gr_ops.h
 delete mode 100644 drivers/gpu/nvgpu/nvgpu_gpuid.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index d9fe9ef1..ee62f02a 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -50,7 +50,7 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
 	}
 
 	for (i = 0; i < cde_ctx->num_obj_ids; i++)
-		gr_gk20a_free_obj_ctx(cde_ctx->ch,
+		gk20a_free_obj_ctx(cde_ctx->ch,
 			&(struct nvgpu_free_obj_ctx_args)
 			{ cde_ctx->obj_ids[i] });
 
@@ -401,7 +401,7 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
 	alloc_obj_ctx.class_num = required_class;
 	alloc_obj_ctx.padding = 0;
 
-	err = gr_gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
+	err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
 	if (err) {
 		gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d",
 			   err);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 777d7ca9..9f2e0017 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -28,7 +28,6 @@
 
 #include "debug_gk20a.h"
 
-#include "gr_ops.h"
 #include "gk20a.h"
 #include "dbg_gpu_gk20a.h"
 #include "fence_gk20a.h"
@@ -639,7 +638,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
 	gk20a_free_error_notifiers(ch);
 
 	/* release channel ctx */
-	g->ops.gr->free_channel_ctx(ch);
+	g->ops.gr.free_channel_ctx(ch);
 
 	gk20a_gr_flush_channel_tlb(gr);
 
@@ -1559,8 +1558,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 
 #ifdef CONFIG_DEBUG_FS
 	/* update debug settings */
-	if (g->ops.ltc->sync_debugfs)
-		g->ops.ltc->sync_debugfs(g);
+	if (g->ops.ltc.sync_debugfs)
+		g->ops.ltc.sync_debugfs(g);
 #endif
 
 	gk20a_dbg_info("channel %d", c->hw_chid);
@@ -2081,7 +2080,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
 
 	gk20a_dbg_fn("");
 
-	return g->ops.gr->bind_ctxsw_zcull(g, gr, ch,
+	return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
 				args->gpu_va, args->mode);
 }
 
@@ -2301,7 +2300,7 @@ long gk20a_channel_ioctl(struct file *filp,
 				__func__, cmd);
 			return err;
 		}
-		err = ch->g->ops.gr->alloc_obj_ctx(ch,
+		err = ch->g->ops.gr.alloc_obj_ctx(ch,
 				(struct nvgpu_alloc_obj_ctx_args *)buf);
 		gk20a_idle(dev);
 		break;
@@ -2313,7 +2312,7 @@ long gk20a_channel_ioctl(struct file *filp,
 				__func__, cmd);
 			return err;
 		}
-		err = ch->g->ops.gr->free_obj_ctx(ch,
+		err = ch->g->ops.gr.free_obj_ctx(ch,
 				(struct nvgpu_free_obj_ctx_args *)buf);
 		gk20a_idle(dev);
 		break;
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index aae77647..93831844 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -21,7 +21,6 @@
 #include <linux/nvgpu.h>
 #include <uapi/linux/nvgpu.h>
 
-#include "gr_ops.h"
 #include "gk20a.h"
 #include "fence_gk20a.h"
 
@@ -275,7 +274,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		if (zcull_info == NULL)
 			return -ENOMEM;
 
-		err = g->ops.gr->get_zcull_info(g, &g->gr, zcull_info);
+		err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
 		if (err) {
 			kfree(zcull_info);
 			break;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 347765aa..ed730174 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1265,8 +1265,8 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
 	if (verbose)
 		gk20a_debug_dump(g->dev);
 
-	if (g->ops.ltc->flush)
-		g->ops.ltc->flush(g);
+	if (g->ops.ltc.flush)
+		g->ops.ltc.flush(g);
 
 	/* store faulted engines in advance */
 	g->fifo.mmu_fault_engines = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 5dda7d74..cea53e00 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -612,7 +612,7 @@ static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
 	if (mc_intr_0 & mc_intr_0_priv_ring_pending_f())
 		gk20a_priv_ring_isr(g);
 	if (mc_intr_0 & mc_intr_0_ltc_pending_f())
-		g->ops.ltc->isr(g);
+		g->ops.ltc.isr(g);
 	if (mc_intr_0 & mc_intr_0_pbus_pending_f())
 		gk20a_pbus_isr(g);
 
@@ -881,8 +881,8 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
 		goto done;
 	}
 
-	if (g->ops.ltc->init_fs_state)
-		g->ops.ltc->init_fs_state(g);
+	if (g->ops.ltc.init_fs_state)
+		g->ops.ltc.init_fs_state(g);
 
 	err = gk20a_init_mm_support(g);
 	if (err) {
@@ -1818,7 +1818,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 {
 	struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
 
-	gpu->L2_cache_size = g->ops.ltc->determine_L2_size_bytes(g);
+	gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
 	gpu->on_board_video_memory_size = 0; /* integrated GPU */
 
 	gpu->num_gpc = g->gr.gpc_count;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 2c3fb400..5669e1c5 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -33,7 +33,7 @@ struct acr_gm20b;
 #include <linux/tegra-soc.h>
 
 #include "../../../arch/arm/mach-tegra/iomap.h"
-#include "nvgpu_gpuid.h"
+
 #include "as_gk20a.h"
 #include "clk_gk20a.h"
 #include "fifo_gk20a.h"
@@ -60,33 +60,81 @@ enum gk20a_cbc_op {
 	gk20a_cbc_op_invalidate,
 };
 
-struct gpu_ltc_ops {
-	int (*determine_L2_size_bytes)(struct gk20a *gk20a);
-	void (*set_max_ways_evict_last)(struct gk20a *g, u32 max_ways);
-	int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr);
-	int (*cbc_ctrl)(struct gk20a *g, enum gk20a_cbc_op op,
-			u32 min, u32 max);
-	void (*set_zbc_color_entry)(struct gk20a *g,
-				    struct zbc_entry *color_val,
-				    u32 index);
-	void (*set_zbc_depth_entry)(struct gk20a *g,
-				    struct zbc_entry *depth_val,
-				    u32 index);
-	void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr);
-	void (*sync_debugfs)(struct gk20a *g);
-	void (*init_fs_state)(struct gk20a *g);
-	void (*elpg_flush)(struct gk20a *g);
-	void (*isr)(struct gk20a *g);
-	u32 (*cbc_fix_config)(struct gk20a *g, int base);
-	void (*flush)(struct gk20a *g);
-};
-
-struct gpu_ltc_ops;
-struct gpu_gr_ops;
-
 struct gpu_ops {
-	const struct gpu_ltc_ops *ltc;
-	const struct gpu_gr_ops *gr;
+	struct {
+		int (*determine_L2_size_bytes)(struct gk20a *gk20a);
+		void (*set_max_ways_evict_last)(struct gk20a *g, u32 max_ways);
+		int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr);
+		int (*cbc_ctrl)(struct gk20a *g, enum gk20a_cbc_op op,
+				u32 min, u32 max);
+		void (*set_zbc_color_entry)(struct gk20a *g,
+					    struct zbc_entry *color_val,
+					    u32 index);
+		void (*set_zbc_depth_entry)(struct gk20a *g,
+					    struct zbc_entry *depth_val,
+					    u32 index);
+		void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr);
+		void (*sync_debugfs)(struct gk20a *g);
+		void (*init_fs_state)(struct gk20a *g);
+		void (*elpg_flush)(struct gk20a *g);
+		void (*isr)(struct gk20a *g);
+		u32 (*cbc_fix_config)(struct gk20a *g, int base);
+		void (*flush)(struct gk20a *g);
+	} ltc;
+	struct {
+		int (*init_fs_state)(struct gk20a *g);
+		void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset);
+		void (*bundle_cb_defaults)(struct gk20a *g);
+		void (*cb_size_default)(struct gk20a *g);
+		int (*calc_global_ctx_buffer_size)(struct gk20a *g);
+		void (*commit_global_attrib_cb)(struct gk20a *g,
+						struct channel_ctx_gk20a *ch_ctx,
+						u64 addr, bool patch);
+		void (*commit_global_bundle_cb)(struct gk20a *g,
+						struct channel_ctx_gk20a *ch_ctx,
+						u64 addr, u64 size, bool patch);
+		int (*commit_global_cb_manager)(struct gk20a *g,
+						struct channel_gk20a *ch,
+						bool patch);
+		void (*commit_global_pagepool)(struct gk20a *g,
+					       struct channel_ctx_gk20a *ch_ctx,
+					       u64 addr, u32 size, bool patch);
+		void (*init_gpc_mmu)(struct gk20a *g);
+		int (*handle_sw_method)(struct gk20a *g, u32 addr,
+					 u32 class_num, u32 offset, u32 data);
+		void (*set_alpha_circular_buffer_size)(struct gk20a *g,
+					               u32 data);
+		void (*set_circular_buffer_size)(struct gk20a *g, u32 data);
+		void (*enable_hww_exceptions)(struct gk20a *g);
+		bool (*is_valid_class)(struct gk20a *g, u32 class_num);
+		void (*get_sm_dsm_perf_regs)(struct gk20a *g,
+						  u32 *num_sm_dsm_perf_regs,
+						  u32 **sm_dsm_perf_regs,
+						  u32 *perf_register_stride);
+		void (*get_sm_dsm_perf_ctrl_regs)(struct gk20a *g,
+						  u32 *num_sm_dsm_perf_regs,
+						  u32 **sm_dsm_perf_regs,
+						  u32 *perf_register_stride);
+		void (*set_hww_esr_report_mask)(struct gk20a *g);
+		int (*setup_alpha_beta_tables)(struct gk20a *g,
+					      struct gr_gk20a *gr);
+		int (*falcon_load_ucode)(struct gk20a *g,
+				u64 addr_base,
+				struct gk20a_ctxsw_ucode_segments *segments,
+				u32 reg_offset);
+		int (*load_ctxsw_ucode)(struct gk20a *g);
+		u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
+		void (*free_channel_ctx)(struct channel_gk20a *c);
+		int (*alloc_obj_ctx)(struct channel_gk20a  *c,
+				struct nvgpu_alloc_obj_ctx_args *args);
+		int (*free_obj_ctx)(struct channel_gk20a  *c,
+				struct nvgpu_free_obj_ctx_args *args);
+		int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
+				struct channel_gk20a *c, u64 zcull_va,
+				u32 mode);
+		int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
+				struct gr_zcull_info *zcull_params);
+	} gr;
 	const char *name;
 	struct {
 		void (*init_fs_state)(struct gk20a *g);
@@ -672,6 +720,18 @@ int __gk20a_do_unidle(struct platform_device *pdev);
 const struct firmware *
 gk20a_request_firmware(struct gk20a *g, const char *fw_name);
 
+#define NVGPU_GPU_ARCHITECTURE_SHIFT 4
+
+/* constructs unique and compact GPUID from nvgpu_gpu_characteristics
+ * arch/impl fields */
+#define GK20A_GPUID(arch, impl) ((u32) ((arch) | (impl)))
+
+#define GK20A_GPUID_GK20A \
+	GK20A_GPUID(NVGPU_GPU_ARCH_GK100, NVGPU_GPU_IMPL_GK20A)
+
+#define GK20A_GPUID_GM20B \
+	GK20A_GPUID(NVGPU_GPU_ARCH_GM200, NVGPU_GPU_IMPL_GM20B)
+
 int gk20a_init_gpu_characteristics(struct gk20a *g);
 
 int gk20a_user_init(struct platform_device *dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index 2abb0e9d..52a34086 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -26,7 +26,6 @@
 
 #include <mach/clk.h>
 
-#include "gr_ops.h"
 #include "gk20a.h"
 #include "gr_gk20a.h"
 #include "fifo_gk20a.h"
@@ -624,9 +623,9 @@ static ssize_t tpc_fs_mask_read(struct device *device,
 	u32 tpc_fs_mask = 0;
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
-		if (g->ops.gr->get_gpc_tpc_mask)
+		if (g->ops.gr.get_gpc_tpc_mask)
 			tpc_fs_mask |=
-				g->ops.gr->get_gpc_tpc_mask(g, gpc_index) <<
+				g->ops.gr.get_gpc_tpc_mask(g, gpc_index) <<
 				(gr->max_tpc_per_gpc_count * gpc_index);
 	}
 
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index dddec803..524547e7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -27,7 +27,6 @@
 #include <linux/firmware.h>
 #include <linux/nvhost.h>
 
-#include "gr_ops.h"
 #include "gk20a.h"
 #include "kind_gk20a.h"
 #include "gr_ctx_gk20a.h"
@@ -66,6 +65,11 @@ static int  gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 					    struct channel_gk20a *c);
 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
 
+/* channel gr ctx buffer */
+static int  gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
+					struct channel_gk20a *c);
+static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
+
 /* channel patch ctx buffer */
 static int  gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
 					struct channel_gk20a *c);
@@ -747,7 +751,7 @@ clean_up:
 	return ret;
 }
 
-int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
+static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
 			struct channel_gk20a *c, bool patch)
 {
 	struct gr_gk20a *gr = &g->gr;
@@ -852,7 +856,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
 		addr, size);
 
-	g->ops.gr->commit_global_pagepool(g, ch_ctx, addr, size, patch);
+	g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch);
 
 	/* global bundle cb */
 	addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
@@ -865,7 +869,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d",
 		addr, size);
 
-	g->ops.gr->commit_global_bundle_cb(g, ch_ctx, addr, size, patch);
+	g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch);
 
 	/* global attrib cb */
 	addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
@@ -874,7 +878,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 		 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
 
 	gk20a_dbg_info("attrib cb addr : 0x%016llx", addr);
-	g->ops.gr->commit_global_attrib_cb(g, ch_ctx, addr, patch);
+	g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch);
 
 	if (patch)
 		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
@@ -882,7 +886,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	return 0;
 }
 
-void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
+static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, bool patch)
 {
@@ -895,7 +899,7 @@ void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
 		gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
 }
 
-void gr_gk20a_commit_global_bundle_cb(struct gk20a *g,
+static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u64 size, bool patch)
 {
@@ -1149,7 +1153,7 @@ static inline u32 clear_count_bits(u32 num, u32 clear_count)
 	return num;
 }
 
-int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
+static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
 					struct gr_gk20a *gr)
 {
 	u32 table_index_bits = 5;
@@ -1253,7 +1257,7 @@ static u32 gr_gk20a_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 	return 0x1;
 }
 
-int gr_gk20a_init_fs_state(struct gk20a *g)
+static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 tpc_index, gpc_index;
@@ -1315,14 +1319,14 @@ int gr_gk20a_init_fs_state(struct gk20a *g)
 
 	/* gr__setup_pd_mapping stubbed for gk20a */
 	gr_gk20a_setup_rop_mapping(g, gr);
-	if (g->ops.gr->setup_alpha_beta_tables)
-		g->ops.gr->setup_alpha_beta_tables(g, gr);
+	if (g->ops.gr.setup_alpha_beta_tables)
+		g->ops.gr.setup_alpha_beta_tables(g, gr);
 
 	if (gr->num_fbps == 1)
 		max_ways_evict = 9;
 
 	if (max_ways_evict != INVALID_MAX_WAYS)
-		g->ops.ltc->set_max_ways_evict_last(g, max_ways_evict);
+		g->ops.ltc.set_max_ways_evict_last(g, max_ways_evict);
 
 	for (gpc_index = 0;
 	     gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
@@ -2093,7 +2097,7 @@ void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
 			gr_fecs_bootvec_vec_f(segments->boot_entry));
 }
 
-int gr_gk20a_falcon_load_ucode(struct gk20a *g, u64 addr_base,
+int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
 	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
 {
 	gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
@@ -2119,10 +2123,10 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
 
 	gr_gk20a_load_falcon_bind_instblk(g);
 
-	g->ops.gr->falcon_load_ucode(g, addr_base,
+	g->ops.gr.falcon_load_ucode(g, addr_base,
 		&g->ctxsw_ucode_info.fecs, 0);
 
-	g->ops.gr->falcon_load_ucode(g, addr_base,
+	g->ops.gr.falcon_load_ucode(g, addr_base,
 		&g->ctxsw_ucode_info.gpccs,
 		gr_gpcs_gpccs_falcon_hwcfg_r() -
 		gr_fecs_falcon_hwcfg_r());
@@ -2293,7 +2297,7 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	attr_buffer_size = g->ops.gr->calc_global_ctx_buffer_size(g);
+	attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
 
 	gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
 
@@ -2628,7 +2632,7 @@ void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
 	__gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx);
 }
 
-void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
+static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
 {
 	__gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
 }
@@ -2715,7 +2719,7 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
 	}
 }
 
-void gr_gk20a_free_channel_ctx(struct channel_gk20a *c)
+void gk20a_free_channel_ctx(struct channel_gk20a *c)
 {
 	gr_gk20a_unmap_global_ctx_buffers(c);
 	gr_gk20a_free_channel_patch_ctx(c);
@@ -2730,7 +2734,7 @@ void gr_gk20a_free_channel_ctx(struct channel_gk20a *c)
 	c->first_init = false;
 }
 
-bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
+static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
 
@@ -2749,7 +2753,7 @@ bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
-int gr_gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
+int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
 			struct nvgpu_alloc_obj_ctx_args *args)
 {
 	struct gk20a *g = c->g;
@@ -2768,7 +2772,7 @@ int gr_gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
 		return -EINVAL;
 	}
 
-	if (!g->ops.gr->is_valid_class(g, args->class_num)) {
+	if (!g->ops.gr.is_valid_class(g, args->class_num)) {
 		gk20a_err(dev_from_gk20a(g),
 			   "invalid obj class 0x%x", args->class_num);
 		err = -EINVAL;
@@ -2912,7 +2916,7 @@ out:
 	return err;
 }
 
-int gr_gk20a_free_obj_ctx(struct channel_gk20a  *c,
+int gk20a_free_obj_ctx(struct channel_gk20a  *c,
 		       struct nvgpu_free_obj_ctx_args *args)
 {
 	unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
@@ -3004,7 +3008,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
 	gk20a_allocator_destroy(&gr->comp_tags);
 }
 
-void gr_gk20a_bundle_cb_defaults(struct gk20a *g)
+static void gr_gk20a_bundle_cb_defaults(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
@@ -3085,9 +3089,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 		gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
 		gr->ppc_count += gr->gpc_ppc_count[gpc_index];
 
-		if (g->ops.gr->get_gpc_tpc_mask)
+		if (g->ops.gr.get_gpc_tpc_mask)
 			gr->gpc_tpc_mask[gpc_index] =
-				g->ops.gr->get_gpc_tpc_mask(g, gpc_index);
+				g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
 
 		for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
 
@@ -3170,9 +3174,9 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 				   pes_index, gpc_index,
 				   gr->pes_tpc_mask[pes_index][gpc_index]);
 
-	g->ops.gr->bundle_cb_defaults(g);
-	g->ops.gr->cb_size_default(g);
-	g->ops.gr->calc_global_ctx_buffer_size(g);
+	g->ops.gr.bundle_cb_defaults(g);
+	g->ops.gr.cb_size_default(g);
+	g->ops.gr.calc_global_ctx_buffer_size(g);
 	gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
 
 	gk20a_dbg_info("bundle_cb_default_size: %d",
@@ -3502,7 +3506,7 @@ static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 	}
 
 	/* update l2 table */
-	g->ops.ltc->set_zbc_color_entry(g, color_val, index);
+	g->ops.ltc.set_zbc_color_entry(g, color_val, index);
 
 	/* update ds table */
 	gk20a_writel(g, gr_ds_zbc_color_r_r(),
@@ -3568,7 +3572,7 @@ static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 	}
 
 	/* update l2 table */
-	g->ops.ltc->set_zbc_depth_entry(g, depth_val, index);
+	g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
 
 	/* update ds table */
 	gk20a_writel(g, gr_ds_zbc_z_r(),
@@ -4168,7 +4172,7 @@ void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
 		     gr_ds_hww_report_mask_sph23_err_report_f());
 }
 
-void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g)
+static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g)
 {
 	/* setup sm warp esr report masks */
 	gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
@@ -4236,8 +4240,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 		     fb_mmu_debug_rd_vol_false_f() |
 		     fb_mmu_debug_rd_addr_f(addr));
 
-	if (g->ops.gr->init_gpc_mmu)
-		g->ops.gr->init_gpc_mmu(g);
+	if (g->ops.gr.init_gpc_mmu)
+		g->ops.gr.init_gpc_mmu(g);
 
 	/* load gr floorsweeping registers */
 	data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
@@ -4280,8 +4284,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 		     gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
 		     gr_fecs_host_int_enable_watchdog_enable_f());
 
-	g->ops.gr->enable_hww_exceptions(g);
-	g->ops.gr->set_hww_esr_report_mask(g);
+	g->ops.gr.enable_hww_exceptions(g);
+	g->ops.gr.set_hww_esr_report_mask(g);
 
 	/* enable TPC exceptions per GPC */
 	gk20a_gr_enable_gpc_exceptions(g);
@@ -4306,7 +4310,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 	else
 		gr_gk20a_load_zbc_default_table(g, gr);
 
-	g->ops.ltc->init_cbc(g, gr);
+	g->ops.ltc.init_cbc(g, gr);
 
 	/* load ctx init */
 	for (i = 0; i < sw_ctx_load->count; i++)
@@ -4325,11 +4329,11 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 		gr_fe_go_idle_timeout_count_disabled_f());
 
 	/* override a few ctx state registers */
-	g->ops.gr->commit_global_cb_manager(g, NULL, false);
+	g->ops.gr.commit_global_cb_manager(g, NULL, false);
 	gr_gk20a_commit_global_timeslice(g, NULL, false);
 
 	/* floorsweep anything left */
-	g->ops.gr->init_fs_state(g);
+	g->ops.gr.init_fs_state(g);
 
 	err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
 	if (err)
@@ -4450,7 +4454,7 @@ int gr_gk20a_init_ctxsw(struct gk20a *g)
 	struct gr_gk20a *gr = &g->gr;
 	u32 err = 0;
 
-	err = g->ops.gr->load_ctxsw_ucode(g);
+	err = g->ops.gr.load_ctxsw_ucode(g);
 	if (err)
 		goto out;
 
@@ -4592,7 +4596,7 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
 	gk20a_dbg_info("total ram pages : %lu", totalram_pages);
 	gr->max_comptag_mem = totalram_pages
 				 >> (10 - (PAGE_SHIFT - 10));
-	err = g->ops.ltc->init_comptags(g, gr);
+	err = g->ops.ltc.init_comptags(g, gr);
 	if (err)
 		goto clean_up;
 
@@ -4809,7 +4813,7 @@ void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data)
 	}
 }
 
-void gr_gk20a_set_circular_buffer_size(struct gk20a *g, u32 data)
+static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val, offset;
@@ -4860,7 +4864,7 @@ void gr_gk20a_set_circular_buffer_size(struct gk20a *g, u32 data)
 	}
 }
 
-void gr_gk20a_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
+static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val;
@@ -4969,7 +4973,7 @@ int gk20a_gr_reset(struct gk20a *g)
 	return 0;
 }
 
-int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
+static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
 					  u32 class_num, u32 offset, u32 data)
 {
 	gk20a_dbg_fn("");
@@ -4990,10 +4994,10 @@ int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
 			gk20a_gr_set_shader_exceptions(g, data);
 			break;
 		case NVA297_SET_CIRCULAR_BUFFER_SIZE:
-			g->ops.gr->set_circular_buffer_size(g, data);
+			g->ops.gr.set_circular_buffer_size(g, data);
 			break;
 		case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
-			g->ops.gr->set_alpha_circular_buffer_size(g, data);
+			g->ops.gr.set_alpha_circular_buffer_size(g, data);
 			break;
 		default:
 			goto fail;
@@ -5035,7 +5039,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
 static int gk20a_gr_handle_illegal_method(struct gk20a *g,
 					  struct gr_isr_data *isr_data)
 {
-	int ret = g->ops.gr->handle_sw_method(g, isr_data->addr,
+	int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
 			isr_data->class_num, isr_data->offset,
 			isr_data->data_lo);
 	if (ret)
@@ -6233,7 +6237,7 @@ int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
 	return 0;
 }
 
-void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
+static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
 {
 	u32 reg;
 	u32 quad_ctrl;
@@ -6405,7 +6409,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
 	 * by computing it from the base gpc/tpc strides.  Then make sure
 	 * it is a real match.
 	 */
-	g->ops.gr->get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
+	g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
 				       &sm_dsm_perf_regs,
 				       &perf_register_stride);
 
@@ -6436,7 +6440,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
 
 	/* Didn't find reg in supported group 1.
 	 *  so try the second group now */
-	g->ops.gr->get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
+	g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
 				       &sm_dsm_perf_ctrl_regs,
 				       &control_register_stride);
 
@@ -6893,8 +6897,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 
 				/* if this is a quad access, setup for special access*/
 				if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)
-						&& g->ops.gr->access_smpc_reg)
-					g->ops.gr->access_smpc_reg(g,
+						&& g->ops.gr.access_smpc_reg)
+					g->ops.gr.access_smpc_reg(g,
 							ctx_ops[i].quad,
 							ctx_ops[i].offset);
 				offset = ctx_ops[i].offset;
@@ -6997,8 +7001,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 
 			/* if this is a quad access, setup for special access*/
 			if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) &&
-					g->ops.gr->access_smpc_reg)
-				g->ops.gr->access_smpc_reg(g, ctx_ops[i].quad,
+					g->ops.gr.access_smpc_reg)
+				g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
 							 ctx_ops[i].offset);
 
 			for (j = 0; j < num_offsets; j++) {
@@ -7079,7 +7083,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	return err;
 }
 
-void gr_gk20a_cb_size_default(struct gk20a *g)
+static void gr_gk20a_cb_size_default(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
@@ -7089,7 +7093,7 @@ void gr_gk20a_cb_size_default(struct gk20a *g)
 		gr_gpc0_ppc0_cbm_cfg2_size_default_v();
 }
 
-int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g)
+static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 	int size;
@@ -7136,37 +7140,35 @@ void gk20a_init_gr(struct gk20a *g)
 	init_waitqueue_head(&g->gr.init_wq);
 }
 
-#include "gr_ops_gk20a.h"
-static const struct gpu_gr_ops gk20a_gr_ops = {
-	__set_gr_gk20a_op(access_smpc_reg),
-	__set_gr_gk20a_op(bundle_cb_defaults),
-	__set_gr_gk20a_op(cb_size_default),
-	__set_gr_gk20a_op(calc_global_ctx_buffer_size),
-	__set_gr_gk20a_op(commit_global_attrib_cb),
-	__set_gr_gk20a_op(commit_global_bundle_cb),
-	__set_gr_gk20a_op(commit_global_cb_manager),
-	__set_gr_gk20a_op(commit_global_pagepool),
-	__set_gr_gk20a_op(handle_sw_method),
-	__set_gr_gk20a_op(set_alpha_circular_buffer_size),
-	__set_gr_gk20a_op(set_circular_buffer_size),
-	__set_gr_gk20a_op(enable_hww_exceptions),
-	__set_gr_gk20a_op(is_valid_class),
-	__set_gr_gk20a_op(get_sm_dsm_perf_regs),
-	__set_gr_gk20a_op(get_sm_dsm_perf_ctrl_regs),
-	__set_gr_gk20a_op(init_fs_state),
-	__set_gr_gk20a_op(set_hww_esr_report_mask),
-	__set_gr_gk20a_op(setup_alpha_beta_tables),
-	__set_gr_gk20a_op(falcon_load_ucode),
-	__set_gr_gk20a_op(load_ctxsw_ucode),
-	__set_gr_gk20a_op(get_gpc_tpc_mask),
-	__set_gr_gk20a_op(free_channel_ctx),
-	__set_gr_gk20a_op(alloc_obj_ctx),
-	__set_gr_gk20a_op(free_obj_ctx),
-	__set_gr_gk20a_op(bind_ctxsw_zcull),
-	__set_gr_gk20a_op(get_zcull_info)
-};
-
 void gk20a_init_gr_ops(struct gpu_ops *gops)
 {
-	gops->gr = &gk20a_gr_ops;
+	gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
+	gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults;
+	gops->gr.cb_size_default = gr_gk20a_cb_size_default;
+	gops->gr.calc_global_ctx_buffer_size =
+		gr_gk20a_calc_global_ctx_buffer_size;
+	gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb;
+	gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb;
+	gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager;
+	gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool;
+	gops->gr.handle_sw_method = gr_gk20a_handle_sw_method;
+	gops->gr.set_alpha_circular_buffer_size =
+		gk20a_gr_set_circular_buffer_size;
+	gops->gr.set_circular_buffer_size =
+		gk20a_gr_set_alpha_circular_buffer_size;
+	gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
+	gops->gr.is_valid_class = gr_gk20a_is_valid_class;
+	gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
+	gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
+	gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
+	gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
+	gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
+	gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
+	gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
+	gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask;
+	gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
+	gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
+	gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
+	gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
+	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 508edc79..7db6bccf 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -350,11 +350,11 @@ int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
 struct nvgpu_alloc_obj_ctx_args;
 struct nvgpu_free_obj_ctx_args;
 
-int gr_gk20a_alloc_obj_ctx(struct channel_gk20a *c,
+int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
 			struct nvgpu_alloc_obj_ctx_args *args);
-int gr_gk20a_free_obj_ctx(struct channel_gk20a *c,
+int gk20a_free_obj_ctx(struct channel_gk20a *c,
 			struct nvgpu_free_obj_ctx_args *args);
-void gr_gk20a_free_channel_ctx(struct channel_gk20a *c);
+void gk20a_free_channel_ctx(struct channel_gk20a *c);
 
 int gk20a_gr_isr(struct gk20a *g);
 int gk20a_gr_nonstall_isr(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ops_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ops_gk20a.h
deleted file mode 100644
index df0cf020..00000000
--- a/drivers/gpu/nvgpu/gk20a/gr_ops_gk20a.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * GPK20A GPU graphics ops
- *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef _GR_OPS_GK20A_H_
-#define _GR_OPS_GK20A_H_
-
-#include "gr_ops.h"
-
-#define __gr_gk20a_op(X)           gr_gk20a_ ## X
-#define __set_gr_gk20a_op(X) . X = gr_gk20a_ ## X
-
-int  __gr_gk20a_op(init_fs_state)(struct gk20a *);
-void __gr_gk20a_op(access_smpc_reg)(struct gk20a *, u32, u32);
-void __gr_gk20a_op(bundle_cb_defaults)(struct gk20a *);
-void __gr_gk20a_op(cb_size_default)(struct gk20a *);
-int  __gr_gk20a_op(calc_global_ctx_buffer_size)(struct gk20a *);
-void __gr_gk20a_op(commit_global_attrib_cb)(struct gk20a *,
-		struct channel_ctx_gk20a *, u64 , bool);
-void __gr_gk20a_op(commit_global_bundle_cb)(struct gk20a *,
-		struct channel_ctx_gk20a *, u64, u64, bool);
-int  __gr_gk20a_op(commit_global_cb_manager)(struct gk20a *,
-		struct channel_gk20a *, bool);
-void __gr_gk20a_op(commit_global_pagepool)(struct gk20a *,
-		struct channel_ctx_gk20a *, u64 , u32, bool);
-void __gr_gk20a_op(init_gpc_mmu)(struct gk20a *);
-int  __gr_gk20a_op(handle_sw_method)(struct gk20a *, u32 , u32, u32, u32);
-void __gr_gk20a_op(set_alpha_circular_buffer_size)(struct gk20a *, u32);
-void __gr_gk20a_op(set_circular_buffer_size)(struct gk20a *, u32);
-void __gr_gk20a_op(enable_hww_exceptions)(struct gk20a *);
-bool __gr_gk20a_op(is_valid_class)(struct gk20a *, u32);
-void __gr_gk20a_op(get_sm_dsm_perf_regs)(struct gk20a *, u32 *, u32 **, u32 *);
-void __gr_gk20a_op(get_sm_dsm_perf_ctrl_regs)(struct gk20a *,
-		u32 *, u32 **, u32 *);
-void __gr_gk20a_op(set_hww_esr_report_mask)(struct gk20a *);
-int  __gr_gk20a_op(setup_alpha_beta_tables)(struct gk20a *, struct gr_gk20a *);
-int  __gr_gk20a_op(falcon_load_ucode)(struct gk20a *, u64,
-		struct gk20a_ctxsw_ucode_segments *, u32);
-int  __gr_gk20a_op(load_ctxsw_ucode)(struct gk20a *);
-u32  __gr_gk20a_op(get_gpc_tpc_mask)(struct gk20a *, u32);
-void __gr_gk20a_op(free_channel_ctx)(struct channel_gk20a *);
-int  __gr_gk20a_op(alloc_obj_ctx)(struct channel_gk20a *c,
-		struct nvgpu_alloc_obj_ctx_args *);
-int  __gr_gk20a_op(free_obj_ctx)(struct channel_gk20a *c,
-		struct nvgpu_free_obj_ctx_args *);
-int  __gr_gk20a_op(bind_ctxsw_zcull)(struct gk20a *,
-		struct gr_gk20a *, struct channel_gk20a *, u64, u32);
-int  __gr_gk20a_op(get_zcull_info)(struct gk20a *,
-		struct gr_gk20a *, struct gr_zcull_info *);
-
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hal.c b/drivers/gpu/nvgpu/gk20a/hal.c
index 84b8d819..8d1a29dd 100644
--- a/drivers/gpu/nvgpu/gk20a/hal.c
+++ b/drivers/gpu/nvgpu/gk20a/hal.c
@@ -20,13 +20,13 @@
 int gpu_init_hal(struct gk20a *g)
 {
 	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
-	gk20a_dbg_fn("ver=0x%x", ver);
 	switch (ver) {
-	case NVGPU_GPUID_GK20A:
-		if (gk20a_init_hal(&g->ops))
-			return -ENODEV;
+	case GK20A_GPUID_GK20A:
+		gk20a_dbg_info("gk20a detected");
+		gk20a_init_hal(&g->ops);
 		break;
-	case NVGPU_GPUID_GM20B:
+	case GK20A_GPUID_GM20B:
+		gk20a_dbg_info("gm20b detected");
 		if (gm20b_init_hal(&g->ops))
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index badf640e..e0ab3f9b 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -189,9 +189,9 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
 		compbit_base_post_divide++;
 
 	/* Bug 1477079 indicates sw adjustment on the posted divided base. */
-	if (g->ops.ltc->cbc_fix_config)
+	if (g->ops.ltc.cbc_fix_config)
 		compbit_base_post_divide =
-			g->ops.ltc->cbc_fix_config(g, compbit_base_post_divide);
+			g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide);
 
 	gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
 		compbit_base_post_divide);
@@ -204,7 +204,7 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
 
 	gr->compbit_store.base_hw = compbit_base_post_divide;
 
-	g->ops.ltc->cbc_ctrl(g, gk20a_cbc_op_invalidate,
+	g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate,
 			    0, max_comptag_lines - 1);
 
 }
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 92dac449..aa094dc7 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -265,24 +265,19 @@ static int gk20a_determine_L2_size_bytes(struct gk20a *g)
 	return cache_size;
 }
 
-static const struct gpu_ltc_ops gk20a_ltc_ops = {
-	.determine_L2_size_bytes = gk20a_determine_L2_size_bytes,
-	.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last,
-	.init_comptags = gk20a_ltc_init_comptags,
-	.cbc_ctrl = gk20a_ltc_cbc_ctrl,
-	.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry,
-	.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry,
-	.init_cbc = gk20a_ltc_init_cbc,
-#ifdef CONFIG_DEBUG_FS
-	.sync_debugfs = gk20a_ltc_sync_debugfs,
-#endif
-	.elpg_flush = gk20a_mm_g_elpg_flush_locked,
-	.init_fs_state = gk20a_ltc_init_fs_state,
-	.isr = gk20a_ltc_isr
-
-};
-
 void gk20a_init_ltc(struct gpu_ops *gops)
 {
-	gops->ltc = &gk20a_ltc_ops;
+	gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
+	gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
+	gops->ltc.init_comptags = gk20a_ltc_init_comptags;
+	gops->ltc.cbc_ctrl = gk20a_ltc_cbc_ctrl;
+	gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
+	gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
+	gops->ltc.init_cbc = gk20a_ltc_init_cbc;
+#ifdef CONFIG_DEBUG_FS
+	gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
+#endif
+	gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
+	gops->ltc.init_fs_state = gk20a_ltc_init_fs_state;
+	gops->ltc.isr = gk20a_ltc_isr;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index bd2ef845..c121d6bf 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1100,7 +1100,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 					COMP_TAG_LINE_SIZE_SHIFT;
 
 		/* init/clear the ctag buffer */
-		g->ops.ltc->cbc_ctrl(g, gk20a_cbc_op_clear,
+		g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
 				ctag_offset, ctag_offset + ctag_lines - 1);
 	}
 
@@ -3079,7 +3079,7 @@ int gk20a_mm_suspend(struct gk20a *g)
 {
 	gk20a_dbg_fn("");
 
-	g->ops.ltc->elpg_flush(g);
+	g->ops.ltc.elpg_flush(g);
 
 	gk20a_dbg_fn("done");
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 1bf4bea0..0580f19d 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -1818,7 +1818,7 @@ int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
 	return 0;
 }
 
-static int gk20a_prepare_pmu_ucode(struct gk20a *g)
+static int gk20a_prepare_ucode(struct gk20a *g)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
 	int i, err = 0;
@@ -2259,7 +2259,7 @@ static void pmu_setup_hw_enable_elpg(struct gk20a *g)
 
 void gk20a_init_pmu_ops(struct gpu_ops *gops)
 {
-	gops->pmu.prepare_ucode = gk20a_prepare_pmu_ucode;
+	gops->pmu.prepare_ucode = gk20a_prepare_ucode;
 	gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
 	gops->pmu.pmu_setup_elpg = NULL;
 	gops->pmu.init_wpr_region = NULL;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 7b69c5c8..8a3de4e8 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1,5 +1,5 @@
 /*
- * GM20B GPU GR
+ * GM20B GPC MMU
  *
  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
  *
@@ -16,7 +16,6 @@
 #include <linux/types.h>
 #include <linux/delay.h>	/* for mdelay */
 
-#include "gr_ops.h"
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
 
@@ -29,7 +28,7 @@
 #include "pmu_gm20b.h"
 #include "acr_gm20b.h"
 
-void gr_gm20b_init_gpc_mmu(struct gk20a *g)
+static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
 {
 	u32 temp;
 
@@ -65,7 +64,7 @@ void gr_gm20b_init_gpc_mmu(struct gk20a *g)
 		gk20a_readl(g, fb_fbhub_num_active_ltcs_r()));
 }
 
-void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
+static void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
@@ -77,7 +76,7 @@ void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
 		gr_pd_ab_dist_cfg2_token_limit_init_v();
 }
 
-void gr_gm20b_cb_size_default(struct gk20a *g)
+static void gr_gm20b_cb_size_default(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
@@ -87,7 +86,7 @@ void gr_gm20b_cb_size_default(struct gk20a *g)
 		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
 }
 
-int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
+static int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 	int size;
@@ -108,7 +107,7 @@ int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
 	return size;
 }
 
-void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
+static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, bool patch)
 {
@@ -125,7 +124,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
 }
 
-void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
+static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u64 size, bool patch)
 {
@@ -161,7 +160,7 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
 
 }
 
-int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
+static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 			struct channel_gk20a *c, bool patch)
 {
 	struct gr_gk20a *gr = &g->gr;
@@ -248,7 +247,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 	return 0;
 }
 
-void gr_gm20b_commit_global_pagepool(struct gk20a *g,
+static void gr_gm20b_commit_global_pagepool(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u32 size, bool patch)
 {
@@ -260,7 +259,7 @@ void gr_gm20b_commit_global_pagepool(struct gk20a *g,
 
 }
 
-int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
+static int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
 					  u32 class_num, u32 offset, u32 data)
 {
 	gk20a_dbg_fn("");
@@ -281,10 +280,10 @@ int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
 			gk20a_gr_set_shader_exceptions(g, data);
 			break;
 		case NVB197_SET_CIRCULAR_BUFFER_SIZE:
-			g->ops.gr->set_circular_buffer_size(g, data);
+			g->ops.gr.set_circular_buffer_size(g, data);
 			break;
 		case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
-			g->ops.gr->set_alpha_circular_buffer_size(g, data);
+			g->ops.gr.set_alpha_circular_buffer_size(g, data);
 			break;
 		default:
 			goto fail;
@@ -296,7 +295,7 @@ fail:
 	return -EINVAL;
 }
 
-void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
+static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val;
@@ -396,7 +395,7 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
 	}
 }
 
-void gr_gm20b_enable_hww_exceptions(struct gk20a *g)
+static void gr_gm20b_enable_hww_exceptions(struct gk20a *g)
 {
 	gr_gk20a_enable_hww_exceptions(g);
 
@@ -407,7 +406,7 @@ void gr_gm20b_enable_hww_exceptions(struct gk20a *g)
 			gr_ds_hww_report_mask_2_sph24_err_report_f());
 }
 
-void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
+static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
 {
 	/* setup sm warp esr report masks */
 	gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
@@ -440,7 +439,7 @@ void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
 		gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f());
 }
 
-bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
+static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
 
@@ -460,7 +459,7 @@ bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
-void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
+static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
 					       u32 *num_sm_dsm_perf_regs,
 					       u32 **sm_dsm_perf_regs,
 					       u32 *perf_register_stride)
@@ -471,7 +470,7 @@ void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
 	*perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
 }
 
-void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
+static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
 					       u32 *num_sm_dsm_perf_regs,
 					       u32 **sm_dsm_perf_regs,
 					       u32 *ctrl_register_stride)
@@ -482,7 +481,7 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
 	*ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
 }
 
-u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 {
 	u32 val;
 	struct gr_gk20a *gr = &g->gr;
@@ -493,7 +492,7 @@ u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 	return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1);
 }
 
-int gr_gm20b_init_fs_state(struct gk20a *g)
+static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 tpc_index, gpc_index;
@@ -596,7 +595,7 @@ int gr_gm20b_init_fs_state(struct gk20a *g)
 	return 0;
 }
 
-int gr_gm20b_falcon_load_ucode(struct gk20a *g, u64 addr_base,
+static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
 	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
 {
 	gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
@@ -623,7 +622,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
 
 	gr_gk20a_load_falcon_bind_instblk(g);
 
-	g->ops.gr->falcon_load_ucode(g, addr_base,
+	g->ops.gr.falcon_load_ucode(g, addr_base,
 		&g->ctxsw_ucode_info.gpccs,
 		gr_gpcs_gpccs_falcon_hwcfg_r() -
 		gr_fecs_falcon_hwcfg_r());
@@ -649,7 +648,7 @@ static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
 	return -ETIMEDOUT;
 }
 
-int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
+static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 {
 	u32 err;
 	gk20a_dbg_fn("");
@@ -711,30 +710,42 @@ int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 }
 #else
 
-int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
+static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 {
 	return -EPERM;
 }
 
 #endif
 
-#include "gk20a/gr_ops_gk20a.h"
-#include "gr_ops_gm20b.h"
-
-static struct gpu_gr_ops gm20b_gr_ops = {
-	__set_gr_gm20b_ops(),
-	__set_gr_gk20a_op(load_ctxsw_ucode)
-};
-
-static struct gpu_gr_ops gm20b_gr_privsecurity_ops = {
-	__set_gr_gm20b_ops(),
-	__set_gr_gm20b_op(load_ctxsw_ucode)
-};
-
 void gm20b_init_gr(struct gpu_ops *gops)
 {
+	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
+	gops->gr.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults;
+	gops->gr.cb_size_default = gr_gm20b_cb_size_default;
+	gops->gr.calc_global_ctx_buffer_size =
+		gr_gm20b_calc_global_ctx_buffer_size;
+	gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb;
+	gops->gr.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb;
+	gops->gr.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager;
+	gops->gr.commit_global_pagepool = gr_gm20b_commit_global_pagepool;
+	gops->gr.handle_sw_method = gr_gm20b_handle_sw_method;
+	gops->gr.set_alpha_circular_buffer_size = gr_gm20b_set_alpha_circular_buffer_size;
+	gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size;
+	gops->gr.enable_hww_exceptions = gr_gm20b_enable_hww_exceptions;
+	gops->gr.is_valid_class = gr_gm20b_is_valid_class;
+	gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
+	gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
+	gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep;
+	gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
+	gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
 	if (gops->privsecurity)
-		gops->gr = &gm20b_gr_privsecurity_ops;
+		gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
 	else
-		gops->gr = &gm20b_gr_ops;
+		gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
+	gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask;
+	gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
+	gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
+	gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
+	gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
+	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index e822b33c..8348b9d9 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * GM20B GPU GR
+ * GM20B GPC MMU
  *
  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
@@ -13,8 +13,8 @@
  * more details.
  */
 
-#ifndef _NVGPU_GR_GM20B_H_
-#define _NVGPU_GR_GM20B_H_
+#ifndef _NVHOST_GM20B_GR_MMU_H
+#define _NVHOST_GM20B_GR_MMU_H
 struct gk20a;
 
 enum {
@@ -29,7 +29,5 @@ enum {
 #define NVB1C0_SET_SHADER_EXCEPTIONS		0x1528
 
 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
-
-struct gpu_ops;
 void gm20b_init_gr(struct gpu_ops *gops);
 #endif
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ops_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_ops_gm20b.h
deleted file mode 100644
index 9477da75..00000000
--- a/drivers/gpu/nvgpu/gm20b/gr_ops_gm20b.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * GM20B GPU graphics ops
- *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef _GR_OPS_GM20B_H_
-#define _GR_OPS_GM20B_H_
-
-#include "gr_ops.h"
-
-#define __gr_gm20b_op(X)            gr_gm20b_ ## X
-#define __set_gr_gm20b_op(X)  . X = gr_gm20b_ ## X
-
-void __gr_gm20b_op(init_gpc_mmu)(struct gk20a *);
-void __gr_gm20b_op(bundle_cb_defaults)(struct gk20a *);
-void __gr_gm20b_op(cb_size_default)(struct gk20a *);
-int  __gr_gm20b_op(calc_global_ctx_buffer_size)(struct gk20a *);
-void __gr_gm20b_op(commit_global_bundle_cb)(struct gk20a *,
-		struct channel_ctx_gk20a *, u64, u64, bool);
-int  __gr_gm20b_op(commit_global_cb_manager)(struct gk20a *,
-		struct channel_gk20a *, bool);
-void __gr_gm20b_op(commit_global_pagepool)(struct gk20a *,
-		struct channel_ctx_gk20a *, u64 , u32, bool);
-int  __gr_gm20b_op(handle_sw_method)(struct gk20a *, u32 , u32, u32, u32);
-void __gr_gm20b_op(set_alpha_circular_buffer_size)(struct gk20a *, u32);
-void __gr_gm20b_op(set_circular_buffer_size)(struct gk20a *, u32);
-void __gr_gm20b_op(enable_hww_exceptions)(struct gk20a *);
-bool __gr_gm20b_op(is_valid_class)(struct gk20a *, u32);
-void __gr_gm20b_op(get_sm_dsm_perf_regs)(struct gk20a *, u32 *, u32 **, u32 *);
-void __gr_gm20b_op(get_sm_dsm_perf_ctrl_regs)(struct gk20a *,
-		u32 *, u32 **, u32 *);
-int  __gr_gm20b_op(init_fs_state)(struct gk20a *);
-void __gr_gm20b_op(set_hww_esr_report_mask)(struct gk20a *);
-int  __gr_gm20b_op(falcon_load_ucode)(struct gk20a *,
-		u64, struct gk20a_ctxsw_ucode_segments *, u32);
-u32  __gr_gm20b_op(get_gpc_tpc_mask)(struct gk20a *, u32);
-int  __gr_gm20b_op(load_ctxsw_ucode)(struct gk20a *);
-
-#define __set_gr_gm20b_ops()				\
-	/* newly defined for gm20b */			\
-	__set_gr_gm20b_op(init_gpc_mmu),		\
-	__set_gr_gm20b_op(bundle_cb_defaults),		\
-	__set_gr_gm20b_op(cb_size_default),		\
-	__set_gr_gm20b_op(calc_global_ctx_buffer_size),	\
-	__set_gr_gm20b_op(commit_global_bundle_cb),	\
-	__set_gr_gm20b_op(commit_global_cb_manager),	\
-	__set_gr_gm20b_op(commit_global_pagepool),	\
-	__set_gr_gm20b_op(handle_sw_method),		\
-	__set_gr_gm20b_op(set_alpha_circular_buffer_size), \
-	__set_gr_gm20b_op(set_circular_buffer_size),	\
-	__set_gr_gm20b_op(enable_hww_exceptions),	\
-	__set_gr_gm20b_op(is_valid_class),		\
-	__set_gr_gm20b_op(get_sm_dsm_perf_regs),	\
-	__set_gr_gm20b_op(get_sm_dsm_perf_ctrl_regs),	\
-	__set_gr_gm20b_op(init_fs_state),		\
-	__set_gr_gm20b_op(set_hww_esr_report_mask),	\
-	__set_gr_gm20b_op(falcon_load_ucode),		\
-	__set_gr_gm20b_op(get_gpc_tpc_mask),		\
-							\
-	/* reused from gk20a */				\
-	__set_gr_gk20a_op(access_smpc_reg),		\
-	__set_gr_gk20a_op(commit_global_attrib_cb),	\
-	__set_gr_gk20a_op(free_channel_ctx),		\
-	__set_gr_gk20a_op(alloc_obj_ctx),		\
-	__set_gr_gk20a_op(free_obj_ctx),		\
-	__set_gr_gk20a_op(bind_ctxsw_zcull),		\
-	__set_gr_gk20a_op(get_zcull_info)
-
-#endif
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 2a888e88..a089b59c 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -388,26 +388,24 @@ static int gm20b_determine_L2_size_bytes(struct gk20a *g)
 	return cache_size;
 }
 
-static struct gpu_ltc_ops gm20b_ltc_ops = {
-	.determine_L2_size_bytes = gm20b_determine_L2_size_bytes,
-	.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last,
-	.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry,
-	.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry,
-	.init_cbc = gk20a_ltc_init_cbc,
-#ifdef CONFIG_DEBUG_FS
-	.sync_debugfs = gk20a_ltc_sync_debugfs,
-#endif
-	/* GM20b specific ops. */
-	.init_fs_state = gm20b_ltc_init_fs_state,
-	.init_comptags = gm20b_ltc_init_comptags,
-	.cbc_ctrl = gm20b_ltc_cbc_ctrl,
-	.elpg_flush = gm20b_ltc_g_elpg_flush_locked,
-	.isr = gm20b_ltc_isr,
-	.cbc_fix_config = gm20b_ltc_cbc_fix_config,
-	.flush = gm20b_flush_ltc
-};
-
 void gm20b_init_ltc(struct gpu_ops *gops)
 {
-	gops->ltc = &gm20b_ltc_ops;
+	/* Gk20a reused ops. */
+	gops->ltc.determine_L2_size_bytes = gm20b_determine_L2_size_bytes;
+	gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
+	gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
+	gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
+	gops->ltc.init_cbc = gk20a_ltc_init_cbc;
+
+	/* GM20b specific ops. */
+	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
+	gops->ltc.init_comptags = gm20b_ltc_init_comptags;
+	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
+	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
+	gops->ltc.isr = gm20b_ltc_isr;
+	gops->ltc.cbc_fix_config = gm20b_ltc_cbc_fix_config;
+	gops->ltc.flush = gm20b_flush_ltc;
+#ifdef CONFIG_DEBUG_FS
+	gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
+#endif
 }
diff --git a/drivers/gpu/nvgpu/gr_ops.h b/drivers/gpu/nvgpu/gr_ops.h
deleted file mode 100644
index bb2d47fa..00000000
--- a/drivers/gpu/nvgpu/gr_ops.h
+++ /dev/null
@@ -1,103 +0,0 @@
-#ifndef _NVGPU_GR_OPS_H_
-#define _NVGPU_GR_OPS_H_
-
-#include <linux/types.h>
-#include <linux/printk.h>
-
-/* TBD: rename these.  s/gk20a/nvgpu/g s/gpu/nvgpu/g*/
-struct gk20a;
-struct channel_ctx_gk20a;
-struct channel_gk20a;
-struct gr_gk20a;
-struct gk20a_ctxsw_ucode_segments;
-struct nvgpu_alloc_obj_ctx_args;
-struct nvgpu_free_obj_ctx_args;
-struct gr_zcull_info;
-
-typedef int (*gr_init_fs_state_fn)(struct gk20a *g);
-typedef void (*gr_access_smpc_reg_fn)(struct gk20a *g, u32 quad, u32 offset);
-typedef void (*gr_bundle_cb_defaults_fn)(struct gk20a *g);
-typedef void (*gr_cb_size_default_fn)(struct gk20a *g);
-typedef int (*gr_calc_global_ctx_buffer_size_fn)(struct gk20a *g);
-typedef void (*gr_commit_global_attrib_cb_fn)(struct gk20a *g,
-					struct channel_ctx_gk20a *ch_ctx,
-					u64 addr, bool patch);
-typedef void (*gr_commit_global_bundle_cb_fn)(struct gk20a *g,
-					struct channel_ctx_gk20a *ch_ctx,
-					u64 addr, u64 size, bool patch);
-typedef int (*gr_commit_global_cb_manager_fn)(struct gk20a *g,
-					struct channel_gk20a *ch,
-					bool patch);
-typedef void (*gr_commit_global_pagepool_fn)(struct gk20a *g,
-				       struct channel_ctx_gk20a *ch_ctx,
-				       u64 addr, u32 size, bool patch);
-typedef void (*gr_init_gpc_mmu_fn)(struct gk20a *g);
-typedef int (*gr_handle_sw_method_fn)(struct gk20a *g, u32 addr,
-				u32 class_num, u32 offset, u32 data);
-typedef void (*gr_set_alpha_circular_buffer_size_fn)(struct gk20a *g,
-					       u32 data);
-typedef void (*gr_set_circular_buffer_size_fn)(struct gk20a *g, u32 data);
-typedef void (*gr_enable_hww_exceptions_fn)(struct gk20a *g);
-typedef bool (*gr_is_valid_class_fn)(struct gk20a *g, u32 class_num);
-typedef void (*gr_get_sm_dsm_perf_regs_fn)(struct gk20a *g,
-				     u32 *num_sm_dsm_perf_regs,
-				     u32 **sm_dsm_perf_regs,
-				     u32 *perf_register_stride);
-typedef void (*gr_get_sm_dsm_perf_ctrl_regs_fn)(struct gk20a *g,
-					  u32 *num_sm_dsm_perf_regs,
-					  u32 **sm_dsm_perf_regs,
-					  u32 *perf_register_stride);
-typedef void (*gr_set_hww_esr_report_mask_fn)(struct gk20a *g);
-typedef int (*gr_setup_alpha_beta_tables_fn)(struct gk20a *g,
-				       struct gr_gk20a *gr);
-typedef int (*gr_falcon_load_ucode_fn)(struct gk20a *g,
-				 u64 addr_base,
-				 struct gk20a_ctxsw_ucode_segments *segments,
-				 u32 reg_offset);
-typedef int (*gr_load_ctxsw_ucode_fn)(struct gk20a *g);
-typedef u32 (*gr_get_gpc_tpc_mask_fn)(struct gk20a *g, u32 gpc_index);
-typedef void (*gr_free_channel_ctx_fn)(struct channel_gk20a *c);
-typedef int (*gr_alloc_obj_ctx_fn)(struct channel_gk20a  *c,
-			     struct nvgpu_alloc_obj_ctx_args *args);
-typedef int (*gr_free_obj_ctx_fn)(struct channel_gk20a  *c,
-			    struct nvgpu_free_obj_ctx_args *args);
-typedef int (*gr_bind_ctxsw_zcull_fn)(struct gk20a *g, struct gr_gk20a *gr,
-				struct channel_gk20a *c, u64 zcull_va,
-				u32 mode);
-typedef int (*gr_get_zcull_info_fn)(struct gk20a *g, struct gr_gk20a *gr,
-			      struct gr_zcull_info *zcull_params);
-
-#define __op_decl(X) gr_##X##_fn X
-
-struct gpu_gr_ops {
-	__op_decl(init_fs_state);
-	__op_decl(access_smpc_reg);
-	__op_decl(bundle_cb_defaults);
-	__op_decl(cb_size_default);
-	__op_decl(calc_global_ctx_buffer_size);
-	__op_decl(commit_global_attrib_cb);
-	__op_decl(commit_global_bundle_cb);
-	__op_decl(commit_global_cb_manager);
-	__op_decl(commit_global_pagepool);
-	__op_decl(init_gpc_mmu);
-	__op_decl(handle_sw_method);
-	__op_decl(set_alpha_circular_buffer_size);
-	__op_decl(set_circular_buffer_size);
-	__op_decl(enable_hww_exceptions);
-	__op_decl(is_valid_class);
-	__op_decl(get_sm_dsm_perf_regs);
-	__op_decl(get_sm_dsm_perf_ctrl_regs);
-	__op_decl(set_hww_esr_report_mask);
-	__op_decl(setup_alpha_beta_tables);
-	__op_decl(falcon_load_ucode);
-	__op_decl(load_ctxsw_ucode);
-	__op_decl(get_gpc_tpc_mask);
-	__op_decl(free_channel_ctx);
-	__op_decl(alloc_obj_ctx);
-	__op_decl(free_obj_ctx);
-	__op_decl(bind_ctxsw_zcull);
-	__op_decl(get_zcull_info);
-};
-#undef __op_decl
-
-#endif
diff --git a/drivers/gpu/nvgpu/nvgpu_gpuid.h b/drivers/gpu/nvgpu/nvgpu_gpuid.h
deleted file mode 100644
index 391d286e..00000000
--- a/drivers/gpu/nvgpu/nvgpu_gpuid.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * NVIDIA GPU ID functions, definitions.
- *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef _NVGPU_GPUID_H_
-#define _NVGPU_GPUID_H_
-
-#define NVGPU_GPU_ARCHITECTURE_SHIFT 4
-
-/* constructs unique and compact GPUID from nvgpu_gpu_characteristics
- * arch/impl fields */
-#define NVGPU_GPUID(arch, impl) ((u32) ((arch) | (impl)))
-
-#define NVGPU_GPUID_GK20A \
-	NVGPU_GPUID(NVGPU_GPU_ARCH_GK100, NVGPU_GPU_IMPL_GK20A)
-
-#define NVGPU_GPUID_GM20B \
-	NVGPU_GPUID(NVGPU_GPU_ARCH_GM200, NVGPU_GPU_IMPL_GM20B)
-
-#endif /* _NVGPU_GPU_ID_H_ */
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 3e0f9dc3..6f8baa4b 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -15,7 +15,6 @@
 
 #include "vgpu/vgpu.h"
 #include "gk20a/hw_gr_gk20a.h"
-#include "gr_ops.h"
 
 static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 {
@@ -105,7 +104,7 @@ static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	attr_buffer_size = g->ops.gr->calc_global_ctx_buffer_size(g);
+	attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
 
 	gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
 	gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
@@ -398,7 +397,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
 		return -EINVAL;
 	}
 
-	if (!g->ops.gr->is_valid_class(g, args->class_num)) {
+	if (!g->ops.gr.is_valid_class(g, args->class_num)) {
 		gk20a_err(dev_from_gk20a(g),
 			   "invalid obj class 0x%x", args->class_num);
 		err = -EINVAL;
@@ -525,9 +524,9 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 			&gr->max_tpc_count))
 		return -ENOMEM;
 
-	g->ops.gr->bundle_cb_defaults(g);
-	g->ops.gr->cb_size_default(g);
-	g->ops.gr->calc_global_ctx_buffer_size(g);
+	g->ops.gr.bundle_cb_defaults(g);
+	g->ops.gr.cb_size_default(g);
+	g->ops.gr.calc_global_ctx_buffer_size(g);
 	return 0;
 }
 
@@ -613,7 +612,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
 	if (err)
 		goto clean_up;
 
-	err = g->ops.ltc->init_comptags(g, gr);
+	err = g->ops.ltc.init_comptags(g, gr);
 	if (err)
 		goto clean_up;
 
@@ -678,15 +677,11 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
 	return 0;
 }
 
-static struct gpu_gr_ops vgpu_gr_ops = {
-	.free_channel_ctx = vgpu_gr_free_channel_ctx,
-	.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
-	.free_obj_ctx = vgpu_gr_free_obj_ctx,
-	.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
-	.get_zcull_info = vgpu_gr_get_zcull_info,
-};
-
 void vgpu_init_gr_ops(struct gpu_ops *gops)
 {
-	gops->gr = &vgpu_gr_ops;
+	gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx;
+	gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx;
+	gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx;
+	gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull;
+	gops->gr.get_zcull_info = vgpu_gr_get_zcull_info;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
index 0a7d19c4..ddff23b7 100644
--- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -48,12 +48,8 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	return 0;
 }
 
-static const struct gpu_ltc_ops vgpu_ltc_ops = {
-	.determine_L2_size_bytes = vgpu_determine_L2_size_bytes,
-	.init_comptags = vgpu_ltc_init_comptags,
-};
-
 void vgpu_init_ltc_ops(struct gpu_ops *gops)
 {
-	gops->ltc = &vgpu_ltc_ops;
+	gops->ltc.determine_L2_size_bytes = vgpu_determine_L2_size_bytes;
+	gops->ltc.init_comptags = vgpu_ltc_init_comptags;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 11fa73c5..84fd6d18 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -21,7 +21,6 @@
 #include "gk20a/debug_gk20a.h"
 #include "gk20a/hal_gk20a.h"
 #include "gk20a/hw_mc_gk20a.h"
-#include "nvgpu_gpuid.h"
 
 static inline int vgpu_comm_init(struct platform_device *pdev)
 {
@@ -240,7 +239,7 @@ static int vgpu_init_hal(struct gk20a *g)
 	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 
 	switch (ver) {
-	case NVGPU_GPUID_GK20A:
+	case GK20A_GPUID_GK20A:
 		gk20a_dbg_info("gk20a detected");
 		/* init gk20a ops then override with virt extensions */
 		gk20a_init_hal(&g->ops);
-- 
cgit v1.2.2