From 1fd722f592c2e0523c5e399a2406a4e387057188 Mon Sep 17 00:00:00 2001
From: Aingara Paramakuru <aparamakuru@nvidia.com>
Date: Mon, 5 May 2014 21:14:22 -0400
Subject: gpu: nvgpu: support gk20a virtualization

The nvgpu driver now supports using the Tegra graphics virtualization
interfaces to support gk20a in a virtualized environment.

Bug 1509608

Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/440122
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/Makefile              |   1 +
 drivers/gpu/nvgpu/gk20a/as_gk20a.c            |   5 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c       |  89 ++++++++++----------
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h       |  11 +++
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c          |  10 ++-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c          |   9 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c               |  46 ++++++++++-
 drivers/gpu/nvgpu/gk20a/gk20a.h               |  49 +++++++++++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c            |   7 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c            | 115 +++++++++++++++-----------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h            |  35 ++++++++
 drivers/gpu/nvgpu/gk20a/platform_gk20a.h      |   9 ++
 drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c |  64 ++++++++++++++
 13 files changed, 349 insertions(+), 101 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
index aa9237b4..fbc9cbec 100644
--- a/drivers/gpu/nvgpu/gk20a/Makefile
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -39,5 +39,6 @@ nvgpu-y := \
 	tsg_gk20a.o
 nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o
 nvgpu-$(CONFIG_SYNC) += sync_gk20a.o
+nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += platform_vgpu_tegra.o
 
 obj-$(CONFIG_GK20A) := nvgpu.o
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 4849dbd5..1a1ca8ff 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -40,6 +40,7 @@ static void release_as_share_id(struct gk20a_as *as, int id)
 static int gk20a_as_alloc_share(struct gk20a_as *as,
 				struct gk20a_as_share **out)
 {
+	struct gk20a *g = gk20a_from_as(as);
 	struct gk20a_as_share *as_share;
 	int err = 0;
 
@@ -55,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as,
 	as_share->ref_cnt.counter = 1;
 
 	/* this will set as_share->vm. */
-	err = gk20a_vm_alloc_share(as_share);
+	err = g->ops.mm.vm_alloc_share(as_share);
 	if (err)
 		goto failed;
 
@@ -106,7 +107,7 @@ static int gk20a_as_ioctl_bind_channel(
 	atomic_inc(&as_share->ref_cnt);
 
 	/* this will set channel_gk20a->vm */
-	err = gk20a_vm_bind_channel(as_share, ch);
+	err = ch->g->ops.mm.vm_bind_channel(as_share, ch);
 	if (err) {
 		atomic_dec(&as_share->ref_cnt);
 		return err;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 45757884..669ec294 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -56,16 +56,9 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
 
 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
-static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
-			u64 gpfifo_base, u32 gpfifo_entries);
 
 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
 
-static int channel_gk20a_alloc_inst(struct gk20a *g,
-				struct channel_gk20a *ch);
-static void channel_gk20a_free_inst(struct gk20a *g,
-				struct channel_gk20a *ch);
-
 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
 					bool add);
 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
@@ -173,12 +166,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 		return -ENOMEM;
 
 	/* disable channel */
-	gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
-		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
-		ccsr_channel_enable_clr_true_f());
+	c->g->ops.fifo.disable_channel(c);
 
 	/* preempt the channel */
-	WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
+	WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
 
 	/* value field is 8 bits long */
 	while (value >= 1 << 8) {
@@ -206,8 +197,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 	return 0;
 }
 
-static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
-				u64 gpfifo_base, u32 gpfifo_entries)
+int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+			u64 gpfifo_base, u32 gpfifo_entries)
 {
 	void *inst_ptr;
 
@@ -269,7 +260,7 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 
 	gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 
-	return 0;
+	return channel_gk20a_commit_userd(c);
 }
 
 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
@@ -347,8 +338,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 	}
 }
 
-static int channel_gk20a_alloc_inst(struct gk20a *g,
-				struct channel_gk20a *ch)
+int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
 	struct device *d = dev_from_gk20a(g);
 	int err = 0;
@@ -384,12 +374,11 @@ static int channel_gk20a_alloc_inst(struct gk20a *g,
 
 clean_up:
 	gk20a_err(d, "fail");
-	channel_gk20a_free_inst(g, ch);
+	g->ops.fifo.free_inst(g, ch);
 	return err;
 }
 
-static void channel_gk20a_free_inst(struct gk20a *g,
-				struct channel_gk20a *ch)
+void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
 	struct device *d = dev_from_gk20a(g);
 
@@ -403,7 +392,16 @@ static void channel_gk20a_free_inst(struct gk20a *g,
 
 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 {
-	return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
+	return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
+}
+
+void channel_gk20a_disable(struct channel_gk20a *ch)
+{
+	/* disable channel */
+	gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+		gk20a_readl(ch->g,
+			ccsr_channel_r(ch->hw_chid)) |
+			ccsr_channel_enable_clr_true_f());
 }
 
 void gk20a_channel_abort(struct channel_gk20a *ch)
@@ -426,11 +424,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
 	}
 	mutex_unlock(&ch->jobs_lock);
 
-	/* disable channel */
-	gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
-		     gk20a_readl(ch->g,
-		     ccsr_channel_r(ch->hw_chid)) |
-		     ccsr_channel_enable_clr_true_f());
+	ch->g->ops.fifo.disable_channel(ch);
 
 	if (released_job_semaphore) {
 		wake_up_interruptible_all(&ch->semaphore_wq);
@@ -479,7 +473,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch,
 	gk20a_wait_channel_idle(ch);
 
 	/* preempt the channel */
-	gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
+	ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
 
 	/* remove channel from runlist */
 	channel_gk20a_update_runlist(ch, false);
@@ -643,7 +637,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
 	gk20a_free_error_notifiers(ch);
 
 	/* release channel ctx */
-	gk20a_free_channel_ctx(ch);
+	g->ops.gr.free_channel_ctx(ch);
 
 	gk20a_gr_flush_channel_tlb(gr);
 
@@ -683,8 +677,8 @@ unbind:
 	if (gk20a_is_channel_marked_as_tsg(ch))
 		gk20a_tsg_unbind_channel(ch);
 
-	channel_gk20a_unbind(ch);
-	channel_gk20a_free_inst(g, ch);
+	g->ops.fifo.unbind_channel(ch);
+	g->ops.fifo.free_inst(g, ch);
 
 	ch->vpr = false;
 	ch->vm = NULL;
@@ -747,7 +741,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 
 	ch->g = g;
 
-	if (channel_gk20a_alloc_inst(g, ch)) {
+	if (g->ops.fifo.alloc_inst(g, ch)) {
 		ch->in_use = false;
 		gk20a_err(dev_from_gk20a(g),
 			   "failed to open gk20a channel, out of inst mem");
@@ -1097,7 +1091,6 @@ static void recycle_priv_cmdbuf(struct channel_gk20a *c)
 	gk20a_dbg_fn("done");
 }
 
-
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 			       struct nvhost_alloc_gpfifo_args *args)
 {
@@ -1181,10 +1174,11 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 	gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
 		c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
 
-	channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
-
 	channel_gk20a_setup_userd(c);
-	channel_gk20a_commit_userd(c);
+
+	err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+	if (err)
+		goto clean_up_unmap;
 
 	/* TBD: setup engine contexts */
 
@@ -1550,7 +1544,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	/* We don't know what context is currently running...                */
 	/* Note also: there can be more than one context associated with the */
 	/* address space (vm).   */
-	gk20a_mm_tlb_invalidate(c->vm);
+	g->ops.mm.tlb_invalidate(c->vm);
 
 	/* Make sure we have enough space for gpfifo entries. If not,
 	 * wait for signals from completed submits */
@@ -1929,7 +1923,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
 
 	gk20a_dbg_fn("");
 
-	return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
+	return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
 				args->gpu_va, args->mode);
 }
 
@@ -1945,7 +1939,7 @@ int gk20a_channel_suspend(struct gk20a *g)
 	gk20a_dbg_fn("");
 
 	/* wait for engine idle */
-	err = gk20a_fifo_wait_engine_idle(g);
+	err = g->ops.fifo.wait_engine_idle(g);
 	if (err)
 		return err;
 
@@ -1954,22 +1948,20 @@ int gk20a_channel_suspend(struct gk20a *g)
 
 			gk20a_dbg_info("suspend channel %d", chid);
 			/* disable channel */
-			gk20a_writel(g, ccsr_channel_r(chid),
-				gk20a_readl(g, ccsr_channel_r(chid)) |
-				ccsr_channel_enable_clr_true_f());
+			g->ops.fifo.disable_channel(&f->channel[chid]);
 			/* preempt the channel */
-			gk20a_fifo_preempt_channel(g, chid);
+			g->ops.fifo.preempt_channel(g, chid);
 
 			channels_in_use = true;
 		}
 	}
 
 	if (channels_in_use) {
-		gk20a_fifo_update_runlist(g, 0, ~0, false, true);
+		g->ops.fifo.update_runlist(g, 0, ~0, false, true);
 
 		for (chid = 0; chid < f->num_channels; chid++) {
 			if (f->channel[chid].in_use)
-				channel_gk20a_unbind(&f->channel[chid]);
+				g->ops.fifo.unbind_channel(&f->channel[chid]);
 		}
 	}
 
@@ -1996,7 +1988,7 @@ int gk20a_channel_resume(struct gk20a *g)
 	}
 
 	if (channels_in_use)
-		gk20a_fifo_update_runlist(g, 0, ~0, true, true);
+		g->ops.fifo.update_runlist(g, 0, ~0, true, true);
 
 	gk20a_dbg_fn("done");
 	return 0;
@@ -2074,6 +2066,11 @@ clean_up:
 void gk20a_init_channel(struct gpu_ops *gops)
 {
 	gops->fifo.bind_channel = channel_gk20a_bind;
+	gops->fifo.unbind_channel = channel_gk20a_unbind;
+	gops->fifo.disable_channel = channel_gk20a_disable;
+	gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
+	gops->fifo.free_inst = channel_gk20a_free_inst;
+	gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
 }
 
 long gk20a_channel_ioctl(struct file *filp,
@@ -2144,7 +2141,7 @@ long gk20a_channel_ioctl(struct file *filp,
 				__func__, cmd);
 			return err;
 		}
-		err = gk20a_alloc_obj_ctx(ch,
+		err = ch->g->ops.gr.alloc_obj_ctx(ch,
 				(struct nvhost_alloc_obj_ctx_args *)buf);
 		gk20a_idle(dev);
 		break;
@@ -2156,7 +2153,7 @@ long gk20a_channel_ioctl(struct file *filp,
 				__func__, cmd);
 			return err;
 		}
-		err = gk20a_free_obj_ctx(ch,
+		err = ch->g->ops.gr.free_obj_ctx(ch,
 				(struct nvhost_free_obj_ctx_args *)buf);
 		gk20a_idle(dev);
 		break;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 2ea3eccb..37ca8244 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -144,6 +144,10 @@ struct channel_gk20a {
 	void *error_notifier_va;
 
 	struct gk20a_channel_sync *sync;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	u64 virt_ctx;
+#endif
 };
 
 static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
@@ -193,4 +197,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 			       struct nvhost_alloc_gpfifo_args *args);
 
+void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
+void channel_gk20a_disable(struct channel_gk20a *ch);
+int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
+void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch);
+int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+			u64 gpfifo_base, u32 gpfifo_entries);
+
 #endif /*__CHANNEL_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index e5628c3f..7338f842 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -158,6 +158,9 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 	struct zbc_entry *zbc_val;
 	struct zbc_query_params *zbc_tbl;
 	int i, err = 0;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	struct gk20a_platform *platform = platform_get_drvdata(dev);
+#endif
 
 	gk20a_dbg_fn("");
 
@@ -197,7 +200,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		if (zcull_info == NULL)
 			return -ENOMEM;
 
-		err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info);
+		err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
 		if (err) {
 			kfree(zcull_info);
 			break;
@@ -219,6 +222,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 	case NVHOST_GPU_IOCTL_ZBC_SET_TABLE:
 		set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf;
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+		if (platform->virtual_dev)
+			return -ENOMEM;
+#endif
+
 		zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
 		if (zbc_val == NULL)
 			return -ENOMEM;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 4363129d..e6b3fd5f 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1173,7 +1173,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
 
 		gk20a_channel_abort(ch);
 		for (i = 0; i < g->fifo.max_runlists; i++)
-			gk20a_fifo_update_runlist(g, i,
+			g->ops.fifo.update_runlist(g, i,
 					hw_chid, false, false);
 
 		if (gk20a_fifo_set_ctx_mmu_error(g, ch))
@@ -1620,7 +1620,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g,
 		pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
 
 	if (pbdma_chid != ~0) {
-		err = gk20a_fifo_preempt_channel(g, pbdma_chid);
+		err = g->ops.fifo.preempt_channel(g, pbdma_chid);
 		if (err)
 			goto clean_up;
 	}
@@ -1636,7 +1636,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g,
 		engine_chid = fifo_engine_status_next_id_v(eng_stat);
 
 	if (engine_chid != ~0 && engine_chid != pbdma_chid) {
-		err = gk20a_fifo_preempt_channel(g, engine_chid);
+		err = g->ops.fifo.preempt_channel(g, engine_chid);
 		if (err)
 			goto clean_up;
 	}
@@ -1960,6 +1960,9 @@ static void gk20a_fifo_apply_pb_timeout(struct gk20a *g)
 void gk20a_init_fifo(struct gpu_ops *gops)
 {
 	gk20a_init_channel(gops);
+	gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
+	gops->fifo.update_runlist = gk20a_fifo_update_runlist;
 	gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault;
 	gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout;
+	gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0816878a..3499cc89 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -40,6 +40,7 @@
 #include <linux/tegra-powergate.h>
 #include <linux/tegra_pm_domains.h>
 #include <linux/clk/tegra.h>
+#include <linux/kthread.h>
 
 #include <linux/sched.h>
 #include <linux/input-cfboost.h>
@@ -57,6 +58,9 @@
 #include "dbg_gpu_gk20a.h"
 #include "hal.h"
 #include "nvhost_acm.h"
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+#include "vgpu/vgpu.h"
+#endif
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/gk20a.h>
@@ -737,6 +741,17 @@ static int gk20a_init_client(struct platform_device *dev)
 
 	gk20a_dbg_fn("");
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	{
+		struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+		if (platform->virtual_dev) {
+			err = vgpu_pm_finalize_poweron(&dev->dev);
+			if (err)
+				return err;
+		}
+	}
+#endif
 #ifndef CONFIG_PM_RUNTIME
 	gk20a_pm_finalize_poweron(&dev->dev);
 #endif
@@ -753,6 +768,16 @@ static int gk20a_init_client(struct platform_device *dev)
 static void gk20a_deinit_client(struct platform_device *dev)
 {
 	gk20a_dbg_fn("");
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	{
+		struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+		if (platform->virtual_dev) {
+			vgpu_pm_prepare_poweroff(&dev->dev);
+			return;
+		}
+	}
+#endif
 #ifndef CONFIG_PM_RUNTIME
 	gk20a_pm_prepare_poweroff(&dev->dev);
 #endif
@@ -1006,6 +1031,10 @@ static struct of_device_id tegra_gk20a_of_match[] = {
 		.data = &gk20a_tegra_platform },
 	{ .compatible = "nvidia,tegra210-gm20b",
 		.data = &gm20b_tegra_platform },
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	{ .compatible = "nvidia,tegra124-gk20a-vgpu",
+		.data = &vgpu_tegra_platform },
+#endif
 #else
 	{ .compatible = "nvidia,tegra124-gk20a",
 		.data = &gk20a_generic_platform },
@@ -1057,7 +1086,7 @@ static int gk20a_create_device(
 	return 0;
 }
 
-static void gk20a_user_deinit(struct platform_device *dev)
+void gk20a_user_deinit(struct platform_device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
 
@@ -1098,7 +1127,7 @@ static void gk20a_user_deinit(struct platform_device *dev)
 		class_destroy(g->class);
 }
 
-static int gk20a_user_init(struct platform_device *dev)
+int gk20a_user_init(struct platform_device *dev)
 {
 	int err;
 	dev_t devno;
@@ -1403,6 +1432,11 @@ static int gk20a_probe(struct platform_device *dev)
 
 	platform_set_drvdata(dev, platform);
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	if (platform->virtual_dev)
+		return vgpu_probe(dev);
+#endif
+
 	gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
 	if (!gk20a) {
 		dev_err(&dev->dev, "couldn't allocate gk20a support");
@@ -1546,8 +1580,16 @@ static int gk20a_probe(struct platform_device *dev)
 static int __exit gk20a_remove(struct platform_device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+#endif
 	gk20a_dbg_fn("");
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	if (platform->virtual_dev)
+		return vgpu_remove(dev);
+#endif
+
 #ifdef CONFIG_INPUT_CFBOOST
 	if (g->boost_added)
 		cfb_remove_device(&dev->dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a1080f0b..b813541a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -131,6 +131,16 @@ struct gpu_ops {
 				u32 reg_offset);
 		int (*load_ctxsw_ucode)(struct gk20a *g);
 		u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
+		void (*free_channel_ctx)(struct channel_gk20a *c);
+		int (*alloc_obj_ctx)(struct channel_gk20a  *c,
+				struct nvhost_alloc_obj_ctx_args *args);
+		int (*free_obj_ctx)(struct channel_gk20a  *c,
+				struct nvhost_free_obj_ctx_args *args);
+		int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
+				struct channel_gk20a *c, u64 zcull_va,
+				u32 mode);
+		int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
+				struct gr_zcull_info *zcull_params);
 	} gr;
 	const char *name;
 	struct {
@@ -148,9 +158,20 @@ struct gpu_ops {
 	} clock_gating;
 	struct {
 		void (*bind_channel)(struct channel_gk20a *ch_gk20a);
+		void (*unbind_channel)(struct channel_gk20a *ch_gk20a);
+		void (*disable_channel)(struct channel_gk20a *ch);
+		int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch);
+		void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch);
+		int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base,
+				u32 gpfifo_entries);
+		int (*preempt_channel)(struct gk20a *g, u32 hw_chid);
+		int (*update_runlist)(struct gk20a *g, u32 runlist_id,
+				u32 hw_chid, bool add,
+				bool wait_for_finish);
 		void (*trigger_mmu_fault)(struct gk20a *g,
 				unsigned long engine_ids);
 		void (*apply_pb_timeout)(struct gk20a *g);
+		int (*wait_engine_idle)(struct gk20a *g);
 	} fifo;
 	struct pmu_v {
 		/*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -241,6 +262,31 @@ struct gpu_ops {
 		void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
 			       u64 size, u32 pgsz_idx);
 		bool (*is_debug_mode_enabled)(struct gk20a *g);
+		u64 (*gmmu_map)(struct vm_gk20a *vm,
+				u64 map_offset,
+				struct sg_table *sgt,
+				u64 buffer_offset,
+				u64 size,
+				int pgsz_idx,
+				u8 kind_v,
+				u32 ctag_offset,
+				u32 flags,
+				int rw_flag,
+				bool clear_ctags);
+		void (*gmmu_unmap)(struct vm_gk20a *vm,
+				u64 vaddr,
+				u64 size,
+				int pgsz_idx,
+				bool va_allocated,
+				int rw_flag);
+		void (*vm_remove)(struct vm_gk20a *vm);
+		int (*vm_alloc_share)(struct gk20a_as_share *as_share);
+		int (*vm_bind_channel)(struct gk20a_as_share *as_share,
+				struct channel_gk20a *ch);
+		int (*fb_flush)(struct gk20a *g);
+		void (*l2_invalidate)(struct gk20a *g);
+		void (*l2_flush)(struct gk20a *g, bool invalidate);
+		void (*tlb_invalidate)(struct vm_gk20a *vm);
 	} mm;
 	struct {
 		int (*prepare_ucode)(struct gk20a *g);
@@ -648,4 +694,7 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name);
 
 int gk20a_init_gpu_characteristics(struct gk20a *g);
 
+int gk20a_user_init(struct platform_device *dev);
+void gk20a_user_deinit(struct platform_device *dev);
+
 #endif /* _NVHOST_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ef7776df..892a138e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -825,7 +825,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
 		}
 	}
 
-	gk20a_mm_fb_flush(g);
+	g->ops.mm.fb_flush(g);
 
 	gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
 		 ch_ctx->zcull_ctx.ctx_sw_mode);
@@ -7077,4 +7077,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
 	gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
 	gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask;
+	gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
+	gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
+	gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
+	gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
+	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 654938b2..3feb675b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -88,7 +88,6 @@ static inline u32 lo32(u64 f)
 	return (u32)(f & 0xffffffff);
 }
 
-static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
 static struct mapped_buffer_node *find_mapped_buffer_locked(
 					struct rb_root *root, u64 addr);
 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
@@ -100,7 +99,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
 				   u64 first_vaddr, u64 last_vaddr,
 				   u8 kind_v, u32 ctag_offset, bool cacheable,
 				   int rw_flag);
-static void gk20a_vm_remove_support(struct vm_gk20a *vm);
 static int gk20a_init_system_vm(struct mm_gk20a *mm);
 static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
 
@@ -335,6 +333,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
 	gk20a_init_bar1_vm(mm);
 	gk20a_init_system_vm(mm);
 
+	/* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
+	g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
 	mm->remove_support = gk20a_remove_mm_support;
 	mm->sw_ready = true;
 
@@ -833,9 +833,9 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
 	mutex_unlock(&vm->update_gmmu_lock);
 }
 
-static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
-			     u64 size,
-			     enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
+u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
+		     u64 size,
+		     enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
 
 {
 	struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
@@ -881,9 +881,9 @@ static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
 	return offset;
 }
 
-static int gk20a_vm_free_va(struct vm_gk20a *vm,
-			     u64 offset, u64 size,
-			     enum gmmu_pgsz_gk20a pgsz_idx)
+int gk20a_vm_free_va(struct vm_gk20a *vm,
+		     u64 offset, u64 size,
+		     enum gmmu_pgsz_gk20a pgsz_idx)
 {
 	struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
 	u32 page_size = gmmu_page_sizes[pgsz_idx];
@@ -1100,21 +1100,32 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
 	return 0;
 }
 
-static u64 __locked_gmmu_map(struct vm_gk20a *vm,
-				u64 map_offset,
-				struct sg_table *sgt,
-				u64 buffer_offset,
-				u64 size,
-				int pgsz_idx,
-				u8 kind_v,
-				u32 ctag_offset,
-				u32 flags,
-				int rw_flag)
+u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
+			u64 map_offset,
+			struct sg_table *sgt,
+			u64 buffer_offset,
+			u64 size,
+			int pgsz_idx,
+			u8 kind_v,
+			u32 ctag_offset,
+			u32 flags,
+			int rw_flag,
+			bool clear_ctags)
 {
 	int err = 0, i = 0;
 	bool allocated = false;
 	u32 pde_lo, pde_hi;
 	struct device *d = dev_from_vm(vm);
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	if (clear_ctags && ctag_offset) {
+		u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >>
+					COMP_TAG_LINE_SIZE_SHIFT;
+
+		/* init/clear the ctag buffer */
+		g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+				ctag_offset, ctag_offset + ctag_lines - 1);
+	}
 
 	/* Allocate (or validate when map_offset != 0) the virtual address. */
 	if (!map_offset) {
@@ -1167,12 +1178,12 @@ fail_alloc:
 	return 0;
 }
 
-static void __locked_gmmu_unmap(struct vm_gk20a *vm,
-				u64 vaddr,
-				u64 size,
-				int pgsz_idx,
-				bool va_allocated,
-				int rw_flag)
+void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
+			u64 vaddr,
+			u64 size,
+			int pgsz_idx,
+			bool va_allocated,
+			int rw_flag)
 {
 	int err = 0;
 	struct gk20a *g = gk20a_from_vm(vm);
@@ -1298,6 +1309,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 	struct buffer_attrs bfr = {0};
 	struct gk20a_comptags comptags;
 	u64 buf_addr;
+	bool clear_ctags = false;
 
 	mutex_lock(&vm->update_gmmu_lock);
 
@@ -1402,11 +1414,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 			bfr.kind_v = bfr.uc_kind_v;
 		} else {
 			gk20a_get_comptags(d, dmabuf, &comptags);
-
-			/* init/clear the ctag buffer */
-			g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
-					  comptags.offset,
-					  comptags.offset + comptags.lines - 1);
+			clear_ctags = true;
 		}
 	}
 
@@ -1414,15 +1422,15 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 	bfr.ctag_offset = comptags.offset;
 
 	/* update gmmu ptes */
-	map_offset = __locked_gmmu_map(vm, map_offset,
+	map_offset = g->ops.mm.gmmu_map(vm, map_offset,
 					bfr.sgt,
 					buffer_offset, /* sg offset */
 					mapping_size,
 					bfr.pgsz_idx,
 					bfr.kind_v,
 					bfr.ctag_offset,
-					flags, rw_flag);
-
+					flags, rw_flag,
+					clear_ctags);
 	if (!map_offset)
 		goto clean_up;
 
@@ -1531,17 +1539,18 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
 		u32 flags,
 		int rw_flag)
 {
+	struct gk20a *g = gk20a_from_vm(vm);
 	u64 vaddr;
 
 	mutex_lock(&vm->update_gmmu_lock);
-	vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
+	vaddr = g->ops.mm.gmmu_map(vm, 0, /* already mapped? - No */
 				*sgt, /* sg table */
 				0, /* sg offset */
 				size,
 				0, /* page size index = 0 i.e. SZ_4K */
 				0, /* kind */
 				0, /* ctag_offset */
-				flags, rw_flag);
+				flags, rw_flag, false);
 	mutex_unlock(&vm->update_gmmu_lock);
 	if (!vaddr) {
 		gk20a_err(dev_from_vm(vm), "failed to allocate va space");
@@ -1549,7 +1558,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
 	}
 
 	/* Invalidate kernel mappings immediately */
-	gk20a_mm_tlb_invalidate(vm);
+	g->ops.mm.tlb_invalidate(vm);
 
 	return vaddr;
 }
@@ -1573,8 +1582,10 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
 		u64 size,
 		int rw_flag)
 {
+	struct gk20a *g = gk20a_from_vm(vm);
+
 	mutex_lock(&vm->update_gmmu_lock);
-	__locked_gmmu_unmap(vm,
+	g->ops.mm.gmmu_unmap(vm,
 			vaddr,
 			size,
 			0, /* page size 4K */
@@ -1970,10 +1981,10 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
 	}
 
 	for (i = 0; i < num_pages; i++) {
-		u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
+		u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr,
 			vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
 			NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
-			gk20a_mem_flag_none);
+			gk20a_mem_flag_none, false);
 
 		if (!page_vaddr) {
 			gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
@@ -1990,7 +2001,7 @@ err_unmap:
 	/* something went wrong. unmap pages */
 	while (i--) {
 		vaddr -= pgsz;
-		__locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
+		g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
 				    gk20a_mem_flag_none);
 	}
 
@@ -2005,12 +2016,14 @@ static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
 
 void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
 			       u64 size, u32 pgsz_idx) {
-	__locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
-				false, gk20a_mem_flag_none);
+	struct gk20a *g = vm->mm->g;
+
+	g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx,
+			false, gk20a_mem_flag_none);
 }
 
 /* NOTE! mapped_buffers lock must be held */
-static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
+void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 {
 	struct vm_gk20a *vm = mapped_buffer->vm;
 	struct gk20a *g = vm->mm->g;
@@ -2026,7 +2039,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 		if (g->ops.mm.put_empty) {
 			g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
 		} else {
-			__locked_gmmu_unmap(vm,
+			g->ops.mm.gmmu_unmap(vm,
 				mapped_buffer->addr,
 				mapped_buffer->size,
 				mapped_buffer->pgsz_idx,
@@ -2036,7 +2049,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 					num_pages, pgsz_idx, false);
 		}
 	} else
-		__locked_gmmu_unmap(vm,
+		g->ops.mm.gmmu_unmap(vm,
 				mapped_buffer->addr,
 				mapped_buffer->size,
 				mapped_buffer->pgsz_idx,
@@ -2085,7 +2098,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
 	mutex_unlock(&vm->update_gmmu_lock);
 }
 
-static void gk20a_vm_remove_support(struct vm_gk20a *vm)
+void gk20a_vm_remove_support(struct vm_gk20a *vm)
 {
 	struct gk20a *g = vm->mm->g;
 	struct mapped_buffer_node *mapped_buffer;
@@ -2156,7 +2169,8 @@ static void gk20a_vm_remove_support(struct vm_gk20a *vm)
 static void gk20a_vm_remove_support_kref(struct kref *ref)
 {
 	struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
-	gk20a_vm_remove_support(vm);
+	struct gk20a *g = gk20a_from_vm(vm);
+	g->ops.mm.vm_remove(vm);
 }
 
 void gk20a_vm_get(struct vm_gk20a *vm)
@@ -3124,5 +3138,14 @@ void gk20a_init_mm(struct gpu_ops *gops)
 	gops->mm.put_empty = gk20a_vm_put_empty;
 	gops->mm.clear_sparse = gk20a_vm_clear_sparse;
 	gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
+	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
+	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
+	gops->mm.vm_remove = gk20a_vm_remove_support;
+	gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
+	gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
+	gops->mm.fb_flush = gk20a_mm_fb_flush;
+	gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
+	gops->mm.l2_flush = gk20a_mm_l2_flush;
+	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b8726c62..f06c465a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -318,6 +318,10 @@ struct vm_gk20a {
 	dma_addr_t zero_page_iova;
 	void *zero_page_cpuva;
 	struct sg_table *zero_page_sgt;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	u64 handle;
+#endif
 };
 
 struct gk20a;
@@ -438,11 +442,30 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
 		u32 flags,
 		int rw_flag);
 
+u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
+			u64 map_offset,
+			struct sg_table *sgt,
+			u64 buffer_offset,
+			u64 size,
+			int pgsz_idx,
+			u8 kind_v,
+			u32 ctag_offset,
+			u32 flags,
+			int rw_flag,
+			bool clear_ctags);
+
 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
 		u64 vaddr,
 		u64 size,
 		int rw_flag);
 
+void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
+			u64 vaddr,
+			u64 size,
+			int pgsz_idx,
+			bool va_allocated,
+			int rw_flag);
+
 struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
 		    struct sg_table *sgt);
@@ -461,6 +484,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 /* unmap handle from kernel */
 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
 
+void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
+
 /* get reference to all currently mapped buffers */
 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
 			 struct mapped_buffer_node ***mapped_buffers,
@@ -482,6 +507,16 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
 void gk20a_vm_get(struct vm_gk20a *vm);
 void gk20a_vm_put(struct vm_gk20a *vm);
 
+void gk20a_vm_remove_support(struct vm_gk20a *vm);
+
+u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
+		     u64 size,
+		     enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
+
+int gk20a_vm_free_va(struct vm_gk20a *vm,
+		     u64 offset, u64 size,
+		     enum gmmu_pgsz_gk20a pgsz_idx);
+
 /* vm-as interface */
 struct nvhost_as_alloc_space_args;
 struct nvhost_as_free_space_args;
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index 6dd0c0db..e6ed9898 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -151,6 +151,12 @@ struct gk20a_platform {
 	 * of the CPU.
 	 */
 	void (*dump_platform_dependencies)(struct platform_device *dev);
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	bool virtual_dev;
+	u64 virt_handle;
+	struct task_struct *intr_handler;
+#endif
 };
 
 static inline struct gk20a_platform *gk20a_get_platform(
@@ -163,6 +169,9 @@ extern struct gk20a_platform gk20a_generic_platform;
 #ifdef CONFIG_TEGRA_GK20A
 extern struct gk20a_platform gk20a_tegra_platform;
 extern struct gk20a_platform gm20b_tegra_platform;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+extern struct gk20a_platform vgpu_tegra_platform;
+#endif
 #endif
 
 static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev)
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
new file mode 100644
index 00000000..ea4fde79
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
@@ -0,0 +1,64 @@
+/*
+ * Tegra Virtualized GPU Platform Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/of_platform.h>
+
+#include "gk20a.h"
+#include "hal_gk20a.h"
+#include "platform_gk20a.h"
+
+static int gk20a_tegra_probe(struct platform_device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct device_node *np = dev->dev.of_node;
+	const __be32 *host1x_ptr;
+	struct platform_device *host1x_pdev = NULL;
+
+	host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
+	if (host1x_ptr) {
+		struct device_node *host1x_node =
+			of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
+
+		host1x_pdev = of_find_device_by_node(host1x_node);
+		if (!host1x_pdev) {
+			dev_warn(&dev->dev, "host1x device not available");
+			return -EPROBE_DEFER;
+		}
+
+	} else {
+		host1x_pdev = to_platform_device(dev->dev.parent);
+		dev_warn(&dev->dev, "host1x reference not found. assuming host1x to be parent");
+	}
+
+	platform->g->host1x_dev = host1x_pdev;
+
+	return 0;
+}
+
+struct gk20a_platform vgpu_tegra_platform = {
+	.has_syncpoints = true,
+
+	/* power management configuration */
+	.can_railgate		= false,
+	.enable_slcg            = false,
+	.enable_blcg            = false,
+	.enable_elcg            = false,
+	.enable_elpg            = false,
+	.enable_aelpg           = false,
+
+	.probe = gk20a_tegra_probe,
+
+	.virtual_dev = true,
+};
-- 
cgit v1.2.2