gpu: nvgpu: move vgpu code to linux

Most of VGPU code is linux specific but lies in common code So until VGPU code is properly abstracted and made os-independent, move all of VGPU code to linux specific directory Handle corresponding Makefile changes Update all #includes to reflect new paths Add GPL license to newly added linux files Jira NVGPU-387 Change-Id: Ic133e4c80e570bcc273f0dacf45283fefd678923 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1599472 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2017-11-14 09:43:28 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-17 11:27:19 -0500
commit: b42fb7ba26b565f93118fbdd9e17b42ee6144c5e (patch)
tree: 26e2d919f019d15b51bba4d7b5c938f77ad5cff5 /drivers/gpu/nvgpu/common
parent: b7cc3a2aa6c92a09eed43513287c9062f22ad127 (diff)
44 files changed, 8022 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/ce2_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/ce2_vgpu.c
new file mode 100644
index 00000000..ffb85f16
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/ce2_vgpu.c
@@ -0,0 +1,45 @@
+/*
+ * Virtualized GPU CE2
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "vgpu.h"
+#include <nvgpu/bug.h>
+int vgpu_ce2_nonstall_isr(struct gk20a *g,
+                        struct tegra_vgpu_ce2_nonstall_intr_info *info)
+{
+        gk20a_dbg_fn("");
+        switch (info->type) {
+        case TEGRA_VGPU_CE2_NONSTALL_INTR_NONBLOCKPIPE:
+                gk20a_channel_semaphore_wakeup(g, true);
+                break;
+        default:
+                WARN_ON(1);
+                break;
+        }
+        return 0;
+}
+u32 vgpu_ce_get_num_pce(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        return priv->constants.num_pce;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
new file mode 100644
index 00000000..bcdf8ee9
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
@@ -0,0 +1,164 @@
+/*
+ * Virtualized GPU Clock Interface
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "vgpu.h"
+#include "clk_vgpu.h"
+static unsigned long
+vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE];
+static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+        int err;
+        unsigned long ret = 0;
+        gk20a_dbg_fn("");
+        switch (api_domain) {
+        case CTRL_CLK_DOMAIN_GPCCLK:
+                msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE;
+                msg.handle = vgpu_get_handle(g);
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                err = err ? err : msg.ret;
+                if (err)
+                        nvgpu_err(g, "%s failed - %d", __func__, err);
+                else
+                        /* return frequency in Hz */
+                        ret = p->rate * 1000;
+                break;
+        case CTRL_CLK_DOMAIN_PWRCLK:
+                nvgpu_err(g, "unsupported clock: %u", api_domain);
+                break;
+        default:
+                nvgpu_err(g, "unknown clock: %u", api_domain);
+                break;
+        }
+        return ret;
+}
+static int vgpu_clk_set_rate(struct gk20a *g,
+                                u32 api_domain, unsigned long rate)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+        int err = -EINVAL;
+        gk20a_dbg_fn("");
+        switch (api_domain) {
+        case CTRL_CLK_DOMAIN_GPCCLK:
+                msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
+                msg.handle = vgpu_get_handle(g);
+                /* server dvfs framework requires frequency in kHz */
+                p->rate = (u32)(rate / 1000);
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                err = err ? err : msg.ret;
+                if (err)
+                        nvgpu_err(g, "%s failed - %d", __func__, err);
+                break;
+        case CTRL_CLK_DOMAIN_PWRCLK:
+                nvgpu_err(g, "unsupported clock: %u", api_domain);
+                break;
+        default:
+                nvgpu_err(g, "unknown clock: %u", api_domain);
+                break;
+        }
+        return err;
+}
+static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        return priv->constants.max_freq;
+}
+void vgpu_init_clk_support(struct gk20a *g)
+{
+        g->ops.clk.get_rate = vgpu_clk_get_rate;
+        g->ops.clk.set_rate = vgpu_clk_set_rate;
+        g->ops.clk.get_maxrate = vgpu_clk_get_maxrate;
+}
+long vgpu_clk_round_rate(struct device *dev, unsigned long rate)
+{
+        /* server will handle frequency rounding */
+        return rate;
+}
+int vgpu_clk_get_freqs(struct device *dev,
+                unsigned long **freqs, int *num_freqs)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(dev);
+        struct gk20a *g = platform->g;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_get_gpu_freq_table_params *p =
+                                        &msg.params.get_gpu_freq_table;
+        unsigned int i;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE;
+        msg.handle = vgpu_get_handle(g);
+        p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(g, "%s failed - %d", __func__, err);
+                return err;
+        }
+        /* return frequency in Hz */
+        for (i = 0; i < p->num_freqs; i++)
+                vgpu_freq_table[i] = p->freqs[i] * 1000;
+        *freqs = vgpu_freq_table;
+        *num_freqs = p->num_freqs;
+        return 0;
+}
+int vgpu_clk_cap_rate(struct device *dev, unsigned long rate)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(dev);
+        struct gk20a *g = platform->g;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+        int err = 0;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE;
+        msg.handle = vgpu_get_handle(g);
+        p->rate = (u32)rate;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(g, "%s failed - %d", __func__, err);
+                return err;
+        }
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
new file mode 100644
index 00000000..8d477643
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
@@ -0,0 +1,27 @@
+/*
+ * Virtualized GPU Clock Interface
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _CLK_VIRT_H_
+#define _CLK_VIRT_H_
+void vgpu_init_clk_support(struct gk20a *g);
+long vgpu_clk_round_rate(struct device *dev, unsigned long rate);
+int vgpu_clk_get_freqs(struct device *dev,
+                        unsigned long **freqs, int *num_freqs);
+int vgpu_clk_cap_rate(struct device *dev, unsigned long rate);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/css_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/css_vgpu.c
new file mode 100644
index 00000000..fba3cc63
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/css_vgpu.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+#include <linux/tegra-ivc.h>
+#include <linux/tegra_vgpu.h>
+#include <uapi/linux/nvgpu.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/css_gr_gk20a.h"
+#include "common/linux/platform_gk20a.h"
+#include "common/linux/vgpu/vgpu.h"
+#include "common/linux/vgpu/css_vgpu.h"
+static struct tegra_hv_ivm_cookie *css_cookie;
+static struct tegra_hv_ivm_cookie *vgpu_css_reserve_mempool(struct gk20a *g)
+{
+        struct device *dev = dev_from_gk20a(g);
+        struct device_node *np = dev->of_node;
+        struct of_phandle_args args;
+        struct device_node *hv_np;
+        struct tegra_hv_ivm_cookie *cookie;
+        u32 mempool;
+        int err;
+        err = of_parse_phandle_with_fixed_args(np,
+                        "mempool-css", 1, 0, &args);
+        if (err) {
+                nvgpu_err(g, "dt missing mempool-css");
+                return ERR_PTR(err);
+        }
+        hv_np = args.np;
+        mempool = args.args[0];
+        cookie = tegra_hv_mempool_reserve(hv_np, mempool);
+        if (IS_ERR_OR_NULL(cookie)) {
+                nvgpu_err(g, "mempool  %u reserve failed", mempool);
+                return ERR_PTR(-EINVAL);
+        }
+        return cookie;
+}
+u32 vgpu_css_get_buffer_size(struct gk20a *g)
+{
+        struct tegra_hv_ivm_cookie *cookie;
+        u32 size;
+        nvgpu_log_fn(g, " ");
+        if (css_cookie) {
+                nvgpu_log_info(g, "buffer size = %llu", css_cookie->size);
+                return (u32)css_cookie->size;
+        }
+        cookie = vgpu_css_reserve_mempool(g);
+        if (IS_ERR(cookie))
+                return 0;
+        size = cookie->size;
+        tegra_hv_mempool_unreserve(cookie);
+        nvgpu_log_info(g, "buffer size = %u", size);
+        return size;
+}
+static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr)
+{
+        struct gk20a *g = gr->g;
+        struct gk20a_cs_snapshot *data = gr->cs_data;
+        void *buf = NULL;
+        int err;
+        gk20a_dbg_fn("");
+        if (data->hw_snapshot)
+                return 0;
+        css_cookie = vgpu_css_reserve_mempool(g);
+        if (IS_ERR(css_cookie))
+                return PTR_ERR(css_cookie);
+        /* Make sure buffer size is large enough */
+        if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) {
+                nvgpu_info(g, "mempool size %lld too small",
+                        css_cookie->size);
+                err = -ENOMEM;
+                goto fail;
+        }
+        buf = ioremap_cache(css_cookie->ipa, css_cookie->size);
+        if (!buf) {
+                nvgpu_info(g, "ioremap_cache failed");
+                err = -EINVAL;
+                goto fail;
+        }
+        data->hw_snapshot = buf;
+        data->hw_end = data->hw_snapshot +
+                css_cookie->size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
+        data->hw_get = data->hw_snapshot;
+        memset(data->hw_snapshot, 0xff, css_cookie->size);
+        return 0;
+fail:
+        tegra_hv_mempool_unreserve(css_cookie);
+        css_cookie = NULL;
+        return err;
+}
+void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr)
+{
+        struct gk20a_cs_snapshot *data = gr->cs_data;
+        if (!data->hw_snapshot)
+                return;
+        iounmap(data->hw_snapshot);
+        data->hw_snapshot = NULL;
+        tegra_hv_mempool_unreserve(css_cookie);
+        css_cookie = NULL;
+        gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n");
+}
+int vgpu_css_flush_snapshots(struct channel_gk20a *ch,
+                        u32 *pending, bool *hw_overflow)
+{
+        struct gk20a *g = ch->g;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_channel_cyclestats_snapshot_params *p;
+        struct gr_gk20a *gr = &g->gr;
+        struct gk20a_cs_snapshot *data = gr->cs_data;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
+        msg.handle = vgpu_get_handle(g);
+        p = &msg.params.cyclestats_snapshot;
+        p->handle = ch->virt_ctx;
+        p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH;
+        p->buf_info = (uintptr_t)data->hw_get - (uintptr_t)data->hw_snapshot;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = (err || msg.ret) ? -1 : 0;
+        *pending = p->buf_info;
+        *hw_overflow = p->hw_overflow;
+        return err;
+}
+static int vgpu_css_attach(struct channel_gk20a *ch,
+                struct gk20a_cs_snapshot_client *cs_client)
+{
+        struct gk20a *g = ch->g;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_channel_cyclestats_snapshot_params *p =
+                                &msg.params.cyclestats_snapshot;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH;
+        p->perfmon_count = cs_client->perfmon_count;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err)
+                nvgpu_err(g, "failed");
+        else
+                cs_client->perfmon_start = p->perfmon_start;
+        return err;
+}
+int vgpu_css_detach(struct channel_gk20a *ch,
+                struct gk20a_cs_snapshot_client *cs_client)
+{
+        struct gk20a *g = ch->g;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_channel_cyclestats_snapshot_params *p =
+                                &msg.params.cyclestats_snapshot;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH;
+        p->perfmon_start = cs_client->perfmon_start;
+        p->perfmon_count = cs_client->perfmon_count;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err)
+                nvgpu_err(g, "failed");
+        return err;
+}
+int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch,
+                                struct gk20a_cs_snapshot_client *cs_client)
+{
+        int ret;
+        ret = vgpu_css_attach(ch, cs_client);
+        if (ret)
+                return ret;
+        ret = vgpu_css_init_snapshot_buffer(&ch->g->gr);
+        return ret;
+}
+#endif /* CONFIG_GK20A_CYCLE_STATS */
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/css_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/css_vgpu.h
new file mode 100644
index 00000000..df95e775
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/css_vgpu.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _CSS_VGPU_H_
+#define _CSS_VGPU_H_
+#include <nvgpu/types.h>
+struct gr_gk20a;
+struct channel_gk20a;
+struct gk20a_cs_snapshot_client;
+void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr);
+int vgpu_css_flush_snapshots(struct channel_gk20a *ch,
+                        u32 *pending, bool *hw_overflow);
+int vgpu_css_detach(struct channel_gk20a *ch,
+                struct gk20a_cs_snapshot_client *cs_client);
+int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch,
+                                struct gk20a_cs_snapshot_client *cs_client);
+u32 vgpu_css_get_buffer_size(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/dbg_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/dbg_vgpu.c
new file mode 100644
index 00000000..06ef43b8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/dbg_vgpu.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/tegra_gr_comm.h>
+#include <linux/tegra_vgpu.h>
+#include <uapi/linux/nvgpu.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "vgpu.h"
+#include "dbg_vgpu.h"
+#include <nvgpu/bug.h>
+int vgpu_exec_regops(struct dbg_session_gk20a *dbg_s,
+                      struct nvgpu_dbg_gpu_reg_op *ops,
+                      u64 num_ops)
+{
+        struct channel_gk20a *ch;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_reg_ops_params *p = &msg.params.reg_ops;
+        void *oob;
+        size_t oob_size, ops_size;
+        void *handle = NULL;
+        int err = 0;
+        gk20a_dbg_fn("");
+        BUG_ON(sizeof(*ops) != sizeof(struct tegra_vgpu_reg_op));
+        handle = tegra_gr_comm_oob_get_ptr(TEGRA_GR_COMM_CTX_CLIENT,
+                                        tegra_gr_comm_get_server_vmid(),
+                                        TEGRA_VGPU_QUEUE_CMD,
+                                        &oob, &oob_size);
+        if (!handle)
+                return -EINVAL;
+        ops_size = sizeof(*ops) * num_ops;
+        if (oob_size < ops_size) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        memcpy(oob, ops, ops_size);
+        msg.cmd = TEGRA_VGPU_CMD_REG_OPS;
+        msg.handle = vgpu_get_handle(dbg_s->g);
+        ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+        p->handle = ch ? ch->virt_ctx : 0;
+        p->num_ops = num_ops;
+        p->is_profiler = dbg_s->is_profiler;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (!err)
+                memcpy(ops, oob, ops_size);
+fail:
+        tegra_gr_comm_oob_put_ptr(handle);
+        return err;
+}
+int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_set_powergate_params *p = &msg.params.set_powergate;
+        int err = 0;
+        u32 mode;
+        gk20a_dbg_fn("");
+        /* Just return if requested mode is the same as the session's mode */
+        if (disable_powergate) {
+                if (dbg_s->is_pg_disabled)
+                        return 0;
+                dbg_s->is_pg_disabled = true;
+                mode = NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE;
+        } else {
+                if (!dbg_s->is_pg_disabled)
+                        return 0;
+                dbg_s->is_pg_disabled = false;
+                mode = NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_SET_POWERGATE;
+        msg.handle = vgpu_get_handle(dbg_s->g);
+        p->mode = mode;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        return err;
+}
+static int vgpu_sendrecv_prof_cmd(struct dbg_session_gk20a *dbg_s, u32 mode)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_prof_mgt_params *p = &msg.params.prof_management;
+        int err = 0;
+        msg.cmd = TEGRA_VGPU_CMD_PROF_MGT;
+        msg.handle = vgpu_get_handle(dbg_s->g);
+        p->mode = mode;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        return err;
+}
+bool vgpu_check_and_set_global_reservation(
+                                struct dbg_session_gk20a *dbg_s,
+                                struct dbg_profiler_object_data *prof_obj)
+{
+        struct gk20a *g = dbg_s->g;
+        if (g->profiler_reservation_count > 0)
+                return false;
+        /* Check that another guest OS doesn't already have a reservation */
+        if (!vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_GET_GLOBAL)) {
+                g->global_profiler_reservation_held = true;
+                g->profiler_reservation_count = 1;
+                dbg_s->has_profiler_reservation = true;
+                prof_obj->has_reservation = true;
+                return true;
+        }
+        return false;
+}
+bool vgpu_check_and_set_context_reservation(
+                                struct dbg_session_gk20a *dbg_s,
+                                struct dbg_profiler_object_data *prof_obj)
+{
+        struct gk20a *g = dbg_s->g;
+        /* Assumes that we've already checked that no global reservation
+         * is in effect for this guest.
+         *
+         * If our reservation count is non-zero, then no other guest has the
+         * global reservation; if it is zero, need to check with RM server.
+         *
+         */
+        if ((g->profiler_reservation_count != 0) ||
+                !vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_GET_CONTEXT)) {
+                g->profiler_reservation_count++;
+                dbg_s->has_profiler_reservation = true;
+                prof_obj->has_reservation = true;
+                return true;
+        }
+        return false;
+}
+void vgpu_release_profiler_reservation(
+                                struct dbg_session_gk20a *dbg_s,
+                                struct dbg_profiler_object_data *prof_obj)
+{
+        struct gk20a *g = dbg_s->g;
+        dbg_s->has_profiler_reservation = false;
+        prof_obj->has_reservation = false;
+        if (prof_obj->ch == NULL)
+                g->global_profiler_reservation_held = false;
+        /* If new reservation count is zero, notify server */
+        g->profiler_reservation_count--;
+        if (g->profiler_reservation_count == 0)
+                vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_RELEASE);
+}
+static int vgpu_sendrecv_perfbuf_cmd(struct gk20a *g, u64 offset, u32 size)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->perfbuf.vm;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_perfbuf_mgt_params *p =
+                                                &msg.params.perfbuf_management;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_PERFBUF_MGT;
+        msg.handle = vgpu_get_handle(g);
+        p->vm_handle = vm->handle;
+        p->offset = offset;
+        p->size = size;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        return err;
+}
+int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size)
+{
+        return vgpu_sendrecv_perfbuf_cmd(g, offset, size);
+}
+int vgpu_perfbuffer_disable(struct gk20a *g)
+{
+        return vgpu_sendrecv_perfbuf_cmd(g, 0, 0);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/dbg_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/dbg_vgpu.h
new file mode 100644
index 00000000..8552a82e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/dbg_vgpu.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _DBG_VGPU_H_
+#define _DBG_VGPU_H_
+struct dbg_session_gk20a;
+struct nvgpu_dbg_gpu_reg_op;
+struct dbg_profiler_object_data;
+struct gk20a;
+int vgpu_exec_regops(struct dbg_session_gk20a *dbg_s,
+                      struct nvgpu_dbg_gpu_reg_op *ops,
+                      u64 num_ops);
+int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate);
+bool vgpu_check_and_set_global_reservation(
+                                struct dbg_session_gk20a *dbg_s,
+                                struct dbg_profiler_object_data *prof_obj);
+bool vgpu_check_and_set_context_reservation(
+                                struct dbg_session_gk20a *dbg_s,
+                                struct dbg_profiler_object_data *prof_obj);
+void vgpu_release_profiler_reservation(
+                                struct dbg_session_gk20a *dbg_s,
+                                struct dbg_profiler_object_data *prof_obj);
+int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size);
+int vgpu_perfbuffer_disable(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
new file mode 100644
index 00000000..5007de36
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/string.h>
+#include <linux/tegra-ivc.h>
+#include <linux/tegra_vgpu.h>
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/ctxsw_trace.h>
+#include "gk20a/gk20a.h"
+#include "vgpu.h"
+#include "fecs_trace_vgpu.h"
+struct vgpu_fecs_trace {
+        struct tegra_hv_ivm_cookie *cookie;
+        struct nvgpu_ctxsw_ring_header *header;
+        struct nvgpu_ctxsw_trace_entry *entries;
+        int num_entries;
+        bool enabled;
+        void *buf;
+};
+int vgpu_fecs_trace_init(struct gk20a *g)
+{
+        struct device *dev = dev_from_gk20a(g);
+        struct device_node *np = dev->of_node;
+        struct of_phandle_args args;
+        struct device_node *hv_np;
+        struct vgpu_fecs_trace *vcst;
+        u32 mempool;
+        int err;
+        gk20a_dbg_fn("");
+        vcst = nvgpu_kzalloc(g, sizeof(*vcst));
+        if (!vcst)
+                return -ENOMEM;
+        err = of_parse_phandle_with_fixed_args(np,
+                        "mempool-fecs-trace", 1, 0, &args);
+        if (err) {
+                dev_info(dev_from_gk20a(g), "does not support fecs trace\n");
+                goto fail;
+        }
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
+        hv_np = args.np;
+        mempool = args.args[0];
+        vcst->cookie = tegra_hv_mempool_reserve(hv_np, mempool);
+        if (IS_ERR(vcst->cookie)) {
+                dev_info(dev_from_gk20a(g),
+                        "mempool  %u reserve failed\n", mempool);
+                vcst->cookie = NULL;
+                err = -EINVAL;
+                goto fail;
+        }
+        vcst->buf = ioremap_cache(vcst->cookie->ipa, vcst->cookie->size);
+        if (!vcst->buf) {
+                dev_info(dev_from_gk20a(g), "ioremap_cache failed\n");
+                err = -EINVAL;
+                goto fail;
+        }
+        vcst->header = vcst->buf;
+        vcst->num_entries = vcst->header->num_ents;
+        if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) {
+                dev_err(dev_from_gk20a(g),
+                        "entry size mismatch\n");
+                goto fail;
+        }
+        vcst->entries = vcst->buf + sizeof(*vcst->header);
+        g->fecs_trace = (struct gk20a_fecs_trace *)vcst;
+        return 0;
+fail:
+        iounmap(vcst->buf);
+        if (vcst->cookie)
+                tegra_hv_mempool_unreserve(vcst->cookie);
+        nvgpu_kfree(g, vcst);
+        return err;
+}
+int vgpu_fecs_trace_deinit(struct gk20a *g)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        iounmap(vcst->buf);
+        tegra_hv_mempool_unreserve(vcst->cookie);
+        nvgpu_kfree(g, vcst);
+        return 0;
+}
+int vgpu_fecs_trace_enable(struct gk20a *g)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        struct tegra_vgpu_cmd_msg msg = {
+                .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE,
+                .handle = vgpu_get_handle(g),
+        };
+        int err;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        vcst->enabled = !err;
+        return err;
+}
+int vgpu_fecs_trace_disable(struct gk20a *g)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        struct tegra_vgpu_cmd_msg msg = {
+                .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE,
+                .handle = vgpu_get_handle(g),
+        };
+        int err;
+        vcst->enabled = false;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        return err;
+}
+bool vgpu_fecs_trace_is_enabled(struct gk20a *g)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        return (vcst && vcst->enabled);
+}
+int vgpu_fecs_trace_poll(struct gk20a *g)
+{
+        struct tegra_vgpu_cmd_msg msg = {
+                .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL,
+                .handle = vgpu_get_handle(g),
+        };
+        int err;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        return err;
+}
+int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        *buf = vcst->buf;
+        *size = vcst->cookie->size;
+        return 0;
+}
+int vgpu_free_user_buffer(struct gk20a *g)
+{
+        return 0;
+}
+int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        unsigned long size = vcst->cookie->size;
+        unsigned long vsize = vma->vm_end - vma->vm_start;
+        size = min(size, vsize);
+        size = round_up(size, PAGE_SIZE);
+        return remap_pfn_range(vma, vma->vm_start,
+                        vcst->cookie->ipa >> PAGE_SHIFT,
+                        size,
+                        vma->vm_page_prot);
+}
+int vgpu_fecs_trace_max_entries(struct gk20a *g,
+                        struct nvgpu_ctxsw_trace_filter *filter)
+{
+        struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+        return vcst->header->num_ents;
+}
+#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE
+#error "FECS trace filter size mismatch!"
+#endif
+int vgpu_fecs_trace_set_filter(struct gk20a *g,
+                        struct nvgpu_ctxsw_trace_filter *filter)
+{
+        struct tegra_vgpu_cmd_msg msg = {
+                .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER,
+                .handle = vgpu_get_handle(g),
+        };
+        struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter;
+        int err;
+        memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits));
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        return err;
+}
+void vgpu_fecs_trace_data_update(struct gk20a *g)
+{
+        gk20a_ctxsw_trace_wake_up(g, 0);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.h
new file mode 100644
index 00000000..c375b841
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __FECS_TRACE_VGPU_H
+#define __FECS_TRACE_VGPU_H
+#include <nvgpu/types.h>
+struct gk20a;
+struct vm_area_struct;
+struct nvgpu_ctxsw_trace_filter;
+void vgpu_fecs_trace_data_update(struct gk20a *g);
+int vgpu_fecs_trace_init(struct gk20a *g);
+int vgpu_fecs_trace_deinit(struct gk20a *g);
+int vgpu_fecs_trace_enable(struct gk20a *g);
+int vgpu_fecs_trace_disable(struct gk20a *g);
+bool vgpu_fecs_trace_is_enabled(struct gk20a *g);
+int vgpu_fecs_trace_poll(struct gk20a *g);
+int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size);
+int vgpu_free_user_buffer(struct gk20a *g);
+int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma);
+int vgpu_fecs_trace_max_entries(struct gk20a *g,
+                        struct nvgpu_ctxsw_trace_filter *filter);
+int vgpu_fecs_trace_set_filter(struct gk20a *g,
+                        struct nvgpu_ctxsw_trace_filter *filter);
+#endif /* __FECS_TRACE_VGPU_H */
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
new file mode 100644
index 00000000..cdcecca5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
@@ -0,0 +1,822 @@
+/*
+ * Virtualized GPU Fifo
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/dma-mapping.h>
+#include <trace/events/gk20a.h>
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/barrier.h>
+#include "vgpu.h"
+#include "fifo_vgpu.h"
+#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
+void vgpu_channel_bind(struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_info("bind channel %d", ch->chid);
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        nvgpu_smp_wmb();
+        nvgpu_atomic_set(&ch->bound, true);
+}
+void vgpu_channel_unbind(struct channel_gk20a *ch)
+{
+        gk20a_dbg_fn("");
+        if (nvgpu_atomic_cmpxchg(&ch->bound, true, false)) {
+                struct tegra_vgpu_cmd_msg msg;
+                struct tegra_vgpu_channel_config_params *p =
+                                &msg.params.channel_config;
+                int err;
+                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
+                msg.handle = vgpu_get_handle(ch->g);
+                p->handle = ch->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                WARN_ON(err || msg.ret);
+        }
+}
+int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
+        msg.handle = vgpu_get_handle(g);
+        p->id = ch->chid;
+        p->pid = (u64)current->tgid;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                nvgpu_err(g, "fail");
+                return -ENOMEM;
+        }
+        ch->virt_ctx = p->handle;
+        gk20a_dbg_fn("done");
+        return 0;
+}
+void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+void vgpu_channel_enable(struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ENABLE;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+void vgpu_channel_disable(struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
+                                u32 gpfifo_entries,
+                                unsigned long acquire_timeout, u32 flags)
+{
+        struct device __maybe_unused *d = dev_from_gk20a(ch->g);
+        struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        p->gpfifo_va = gpfifo_base;
+        p->num_entries = gpfifo_entries;
+        p->userd_addr = ch->userd_iova;
+        p->iova = mapping ? 1 : 0;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -ENOMEM : 0;
+}
+int vgpu_fifo_init_engine_info(struct fifo_gk20a *f)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(f->g);
+        struct tegra_vgpu_engines_info *engines = &priv->constants.engines_info;
+        u32 i;
+        gk20a_dbg_fn("");
+        if (engines->num_engines > TEGRA_VGPU_MAX_ENGINES) {
+                nvgpu_err(f->g, "num_engines %d larger than max %d",
+                        engines->num_engines, TEGRA_VGPU_MAX_ENGINES);
+                return -EINVAL;
+        }
+        f->num_engines = engines->num_engines;
+        for (i = 0; i < f->num_engines; i++) {
+                struct fifo_engine_info_gk20a *info =
+                                        &f->engine_info[engines->info[i].engine_id];
+                if (engines->info[i].engine_id >= f->max_engines) {
+                        nvgpu_err(f->g, "engine id %d larger than max %d",
+                                engines->info[i].engine_id,
+                                f->max_engines);
+                        return -EINVAL;
+                }
+                info->intr_mask = engines->info[i].intr_mask;
+                info->reset_mask = engines->info[i].reset_mask;
+                info->runlist_id = engines->info[i].runlist_id;
+                info->pbdma_id = engines->info[i].pbdma_id;
+                info->inst_id = engines->info[i].inst_id;
+                info->pri_base = engines->info[i].pri_base;
+                info->engine_enum = engines->info[i].engine_enum;
+                info->fault_id = engines->info[i].fault_id;
+                f->active_engines_list[i] = engines->info[i].engine_id;
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+}
+static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
+{
+        struct fifo_runlist_info_gk20a *runlist;
+        struct device *d = dev_from_gk20a(g);
+        unsigned int runlist_id = -1;
+        u32 i;
+        u64 runlist_size;
+        gk20a_dbg_fn("");
+        f->max_runlists = g->ops.fifo.eng_runlist_base_size();
+        f->runlist_info = nvgpu_kzalloc(g,
+                                sizeof(struct fifo_runlist_info_gk20a) *
+                                f->max_runlists);
+        if (!f->runlist_info)
+                goto clean_up_runlist;
+        memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
+                f->max_runlists));
+        for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+                runlist = &f->runlist_info[runlist_id];
+                runlist->active_channels =
+                        nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
+                                                      BITS_PER_BYTE));
+                if (!runlist->active_channels)
+                        goto clean_up_runlist;
+                runlist_size  = sizeof(u16) * f->num_channels;
+                for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+                        int err = nvgpu_dma_alloc_sys(g, runlist_size,
+                                                &runlist->mem[i]);
+                        if (err) {
+                                dev_err(d, "memory allocation failed\n");
+                                goto clean_up_runlist;
+                        }
+                }
+                nvgpu_mutex_init(&runlist->mutex);
+                /* None of buffers is pinned if this value doesn't change.
+                    Otherwise, one of them (cur_buffer) must have been pinned. */
+                runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+clean_up_runlist:
+        gk20a_fifo_delete_runlist(f);
+        gk20a_dbg_fn("fail");
+        return -ENOMEM;
+}
+static int vgpu_init_fifo_setup_sw(struct gk20a *g)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct fifo_gk20a *f = &g->fifo;
+        struct device *d = dev_from_gk20a(g);
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        unsigned int chid;
+        int err = 0;
+        gk20a_dbg_fn("");
+        if (f->sw_ready) {
+                gk20a_dbg_fn("skip init");
+                return 0;
+        }
+        f->g = g;
+        f->num_channels = priv->constants.num_channels;
+        f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+        f->userd_entry_size = 1 << ram_userd_base_shift_v();
+        err = nvgpu_dma_alloc_sys(g, f->userd_entry_size * f->num_channels,
+                        &f->userd);
+        if (err) {
+                dev_err(d, "memory allocation failed\n");
+                goto clean_up;
+        }
+        /* bar1 va */
+        if (g->ops.mm.is_bar1_supported(g)) {
+                f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.priv.sgt,
+                                                f->userd.size);
+                if (!f->userd.gpu_va) {
+                        dev_err(d, "gmmu mapping failed\n");
+                        goto clean_up;
+                }
+                /* if reduced BAR1 range is specified, use offset of 0
+                 * (server returns offset assuming full BAR1 range)
+                 */
+                if (resource_size(l->bar1_mem) ==
+                                (resource_size_t)f->userd.size)
+                        f->userd.gpu_va = 0;
+        }
+        gk20a_dbg(gpu_dbg_map_v, "userd bar1 va = 0x%llx", f->userd.gpu_va);
+        f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
+        f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
+        f->engine_info = nvgpu_kzalloc(g, f->max_engines *
+                                       sizeof(*f->engine_info));
+        f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
+        if (!(f->channel && f->tsg && f->engine_info && f->active_engines_list)) {
+                err = -ENOMEM;
+                goto clean_up;
+        }
+        memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
+        g->ops.fifo.init_engine_info(f);
+        init_runlist(g, f);
+        nvgpu_init_list_node(&f->free_chs);
+        nvgpu_mutex_init(&f->free_chs_mutex);
+        for (chid = 0; chid < f->num_channels; chid++) {
+                f->channel[chid].userd_iova =
+                        nvgpu_mem_get_addr(g, &f->userd) +
+                        chid * f->userd_entry_size;
+                f->channel[chid].userd_gpu_va =
+                        f->userd.gpu_va + chid * f->userd_entry_size;
+                gk20a_init_channel_support(g, chid);
+                gk20a_init_tsg_support(g, chid);
+        }
+        nvgpu_mutex_init(&f->tsg_inuse_mutex);
+        err = nvgpu_channel_worker_init(g);
+        if (err)
+                goto clean_up;
+        f->deferred_reset_pending = false;
+        nvgpu_mutex_init(&f->deferred_reset_mutex);
+        f->channel_base = priv->constants.channel_base;
+        f->sw_ready = true;
+        gk20a_dbg_fn("done");
+        return 0;
+clean_up:
+        gk20a_dbg_fn("fail");
+        /* FIXME: unmap from bar1 */
+        nvgpu_dma_free(g, &f->userd);
+        memset(&f->userd, 0, sizeof(f->userd));
+        nvgpu_vfree(g, f->channel);
+        f->channel = NULL;
+        nvgpu_vfree(g, f->tsg);
+        f->tsg = NULL;
+        nvgpu_kfree(g, f->engine_info);
+        f->engine_info = NULL;
+        nvgpu_kfree(g, f->active_engines_list);
+        f->active_engines_list = NULL;
+        return err;
+}
+int vgpu_init_fifo_setup_hw(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        /* test write, read through bar1 @ userd region before
+         * turning on the snooping */
+        {
+                struct fifo_gk20a *f = &g->fifo;
+                u32 v, v1 = 0x33, v2 = 0x55;
+                u32 bar1_vaddr = f->userd.gpu_va;
+                volatile u32 *cpu_vaddr = f->userd.cpu_va;
+                gk20a_dbg_info("test bar1 @ vaddr 0x%x",
+                           bar1_vaddr);
+                v = gk20a_bar1_readl(g, bar1_vaddr);
+                *cpu_vaddr = v1;
+                nvgpu_mb();
+                if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
+                        nvgpu_err(g, "bar1 broken @ gk20a!");
+                        return -EINVAL;
+                }
+                gk20a_bar1_writel(g, bar1_vaddr, v2);
+                if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
+                        nvgpu_err(g, "bar1 broken @ gk20a!");
+                        return -EINVAL;
+                }
+                /* is it visible to the cpu? */
+                if (*cpu_vaddr != v2) {
+                        nvgpu_err(g, "cpu didn't see bar1 write @ %p!",
+                                cpu_vaddr);
+                }
+                /* put it back */
+                gk20a_bar1_writel(g, bar1_vaddr, v);
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+}
+int vgpu_init_fifo_support(struct gk20a *g)
+{
+        u32 err;
+        gk20a_dbg_fn("");
+        err = vgpu_init_fifo_setup_sw(g);
+        if (err)
+                return err;
+        if (g->ops.fifo.init_fifo_setup_hw)
+                err = g->ops.fifo.init_fifo_setup_hw(g);
+        return err;
+}
+int vgpu_fifo_preempt_channel(struct gk20a *g, u32 chid)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        struct channel_gk20a *ch = &f->channel[chid];
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_fn("");
+        if (!nvgpu_atomic_read(&ch->bound))
+                return 0;
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                nvgpu_err(g,
+                        "preempt channel %d failed", chid);
+                err = -ENOMEM;
+        }
+        return err;
+}
+int vgpu_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_tsg_preempt_params *p =
+                        &msg.params.tsg_preempt;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_TSG_PREEMPT;
+        msg.handle = vgpu_get_handle(g);
+        p->tsg_id = tsgid;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(g,
+                        "preempt tsg %u failed", tsgid);
+        }
+        return err;
+}
+static int vgpu_submit_runlist(struct gk20a *g, u64 handle, u8 runlist_id,
+                               u16 *runlist, u32 num_entries)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_runlist_params *p;
+        int err;
+        void *oob_handle;
+        void *oob;
+        size_t size, oob_size;
+        oob_handle = tegra_gr_comm_oob_get_ptr(TEGRA_GR_COMM_CTX_CLIENT,
+                        tegra_gr_comm_get_server_vmid(), TEGRA_VGPU_QUEUE_CMD,
+                        &oob, &oob_size);
+        if (!oob_handle)
+                return -EINVAL;
+        size = sizeof(*runlist) * num_entries;
+        if (oob_size < size) {
+                err = -ENOMEM;
+                goto done;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
+        msg.handle = handle;
+        p = &msg.params.runlist;
+        p->runlist_id = runlist_id;
+        p->num_entries = num_entries;
+        memcpy(oob, runlist, size);
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = (err || msg.ret) ? -1 : 0;
+done:
+        tegra_gr_comm_oob_put_ptr(oob_handle);
+        return err;
+}
+static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+                                        u32 chid, bool add,
+                                        bool wait_for_finish)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        struct fifo_runlist_info_gk20a *runlist;
+        u16 *runlist_entry = NULL;
+        u32 count = 0;
+        gk20a_dbg_fn("");
+        runlist = &f->runlist_info[runlist_id];
+        /* valid channel, add/remove it from active list.
+           Otherwise, keep active list untouched for suspend/resume. */
+        if (chid != (u32)~0) {
+                if (add) {
+                        if (test_and_set_bit(chid,
+                                runlist->active_channels) == 1)
+                                return 0;
+                } else {
+                        if (test_and_clear_bit(chid,
+                                runlist->active_channels) == 0)
+                                return 0;
+                }
+        }
+        if (chid != (u32)~0 || /* add/remove a valid channel */
+            add /* resume to add all channels back */) {
+                u32 cid;
+                runlist_entry = runlist->mem[0].cpu_va;
+                for_each_set_bit(cid,
+                        runlist->active_channels, f->num_channels) {
+                        gk20a_dbg_info("add channel %d to runlist", cid);
+                        runlist_entry[0] = cid;
+                        runlist_entry++;
+                        count++;
+                }
+        } else  /* suspend to remove all channels */
+                count = 0;
+        return vgpu_submit_runlist(g, vgpu_get_handle(g), runlist_id,
+                                runlist->mem[0].cpu_va, count);
+}
+/* add/remove a channel from runlist
+   special cases below: runlist->active_channels will NOT be changed.
+   (chid == ~0 && !add) means remove all active channels from runlist.
+   (chid == ~0 &&  add) means restore all active channels on runlist. */
+int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
+                                u32 chid, bool add, bool wait_for_finish)
+{
+        struct fifo_runlist_info_gk20a *runlist = NULL;
+        struct fifo_gk20a *f = &g->fifo;
+        u32 ret = 0;
+        gk20a_dbg_fn("");
+        runlist = &f->runlist_info[runlist_id];
+        nvgpu_mutex_acquire(&runlist->mutex);
+        ret = vgpu_fifo_update_runlist_locked(g, runlist_id, chid, add,
+                                        wait_for_finish);
+        nvgpu_mutex_release(&runlist->mutex);
+        return ret;
+}
+int vgpu_fifo_wait_engine_idle(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        return 0;
+}
+static int vgpu_fifo_tsg_set_runlist_interleave(struct gk20a *g,
+                                        u32 tsgid,
+                                        u32 runlist_id,
+                                        u32 new_level)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_tsg_runlist_interleave_params *p =
+                        &msg.params.tsg_interleave;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_TSG_SET_RUNLIST_INTERLEAVE;
+        msg.handle = vgpu_get_handle(g);
+        p->tsg_id = tsgid;
+        p->level = new_level;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        return err ? err : msg.ret;
+}
+int vgpu_fifo_set_runlist_interleave(struct gk20a *g,
+                                        u32 id,
+                                        bool is_tsg,
+                                        u32 runlist_id,
+                                        u32 new_level)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_runlist_interleave_params *p =
+                        &msg.params.channel_interleave;
+        struct channel_gk20a *ch;
+        int err;
+        gk20a_dbg_fn("");
+        if (is_tsg)
+                return vgpu_fifo_tsg_set_runlist_interleave(g, id,
+                                                runlist_id, new_level);
+        ch = &g->fifo.channel[id];
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_RUNLIST_INTERLEAVE;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        p->level = new_level;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        return err ? err : msg.ret;
+}
+int vgpu_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_timeslice_params *p =
+                        &msg.params.channel_timeslice;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        p->timeslice_us = timeslice;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        if (!err)
+                ch->timeslice_us = p->timeslice_us;
+        return err;
+}
+int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
+                                        u32 err_code, bool verbose)
+{
+        struct tsg_gk20a *tsg = NULL;
+        struct channel_gk20a *ch_tsg = NULL;
+        struct gk20a *g = ch->g;
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_fn("");
+        if (gk20a_is_channel_marked_as_tsg(ch)) {
+                tsg = &g->fifo.tsg[ch->tsgid];
+                nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+                list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
+                        if (gk20a_channel_get(ch_tsg)) {
+                                gk20a_set_error_notifier(ch_tsg, err_code);
+                                ch_tsg->has_timedout = true;
+                                gk20a_channel_put(ch_tsg);
+                        }
+                }
+                nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+        } else {
+                gk20a_set_error_notifier(ch, err_code);
+                ch->has_timedout = true;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        if (!err)
+                gk20a_channel_abort(ch, false);
+        return err ? err : msg.ret;
+}
+static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
+                struct channel_gk20a *ch)
+{
+        nvgpu_mutex_acquire(&ch->error_notifier_mutex);
+        if (ch->error_notifier_ref) {
+                if (ch->error_notifier->status == 0xffff) {
+                        /* If error code is already set, this mmu fault
+                         * was triggered as part of recovery from other
+                         * error condition.
+                         * Don't overwrite error flag. */
+                } else {
+                        gk20a_set_error_notifier_locked(ch,
+                                NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
+                }
+        }
+        nvgpu_mutex_release(&ch->error_notifier_mutex);
+        /* mark channel as faulted */
+        ch->has_timedout = true;
+        nvgpu_smp_wmb();
+        /* unblock pending waits */
+        nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
+        nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
+}
+static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g,
+                struct channel_gk20a *ch)
+{
+        struct tsg_gk20a *tsg = NULL;
+        struct channel_gk20a *ch_tsg = NULL;
+        if (gk20a_is_channel_marked_as_tsg(ch)) {
+                tsg = &g->fifo.tsg[ch->tsgid];
+                nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+                list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
+                        if (gk20a_channel_get(ch_tsg)) {
+                                vgpu_fifo_set_ctx_mmu_error_ch(g, ch_tsg);
+                                gk20a_channel_put(ch_tsg);
+                        }
+                }
+                nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+        } else {
+                vgpu_fifo_set_ctx_mmu_error_ch(g, ch);
+        }
+}
+int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        struct channel_gk20a *ch = gk20a_channel_get(&f->channel[info->chid]);
+        gk20a_dbg_fn("");
+        if (!ch)
+                return 0;
+        nvgpu_err(g, "fifo intr (%d) on ch %u",
+                info->type, info->chid);
+        trace_gk20a_channel_reset(ch->chid, ch->tsgid);
+        switch (info->type) {
+        case TEGRA_VGPU_FIFO_INTR_PBDMA:
+                gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR);
+                break;
+        case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
+                gk20a_set_error_notifier(ch,
+                                        NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+                break;
+        case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
+                vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
+                gk20a_channel_abort(ch, false);
+                break;
+        default:
+                WARN_ON(1);
+                break;
+        }
+        gk20a_channel_put(ch);
+        return 0;
+}
+int vgpu_fifo_nonstall_isr(struct gk20a *g,
+                        struct tegra_vgpu_fifo_nonstall_intr_info *info)
+{
+        gk20a_dbg_fn("");
+        switch (info->type) {
+        case TEGRA_VGPU_FIFO_NONSTALL_INTR_CHANNEL:
+                gk20a_channel_semaphore_wakeup(g, false);
+                break;
+        default:
+                WARN_ON(1);
+                break;
+        }
+        return 0;
+}
+u32 vgpu_fifo_default_timeslice_us(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        return priv->constants.default_timeslice_us;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.h
new file mode 100644
index 00000000..62a3a256
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _FIFO_VGPU_H_
+#define _FIFO_VGPU_H_
+#include <nvgpu/types.h>
+struct gk20a;
+struct channel_gk20a;
+struct fifo_gk20a;
+struct tsg_gk20a;
+int vgpu_init_fifo_setup_hw(struct gk20a *g);
+void vgpu_channel_bind(struct channel_gk20a *ch);
+void vgpu_channel_unbind(struct channel_gk20a *ch);
+int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
+void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch);
+void vgpu_channel_enable(struct channel_gk20a *ch);
+void vgpu_channel_disable(struct channel_gk20a *ch);
+int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
+                                u32 gpfifo_entries,
+                                unsigned long acquire_timeout, u32 flags);
+int vgpu_fifo_init_engine_info(struct fifo_gk20a *f);
+int vgpu_fifo_preempt_channel(struct gk20a *g, u32 chid);
+int vgpu_fifo_preempt_tsg(struct gk20a *g, u32 tsgid);
+int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
+                                u32 chid, bool add, bool wait_for_finish);
+int vgpu_fifo_wait_engine_idle(struct gk20a *g);
+int vgpu_fifo_set_runlist_interleave(struct gk20a *g,
+                                        u32 id,
+                                        bool is_tsg,
+                                        u32 runlist_id,
+                                        u32 new_level);
+int vgpu_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice);
+int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
+                                        u32 err_code, bool verbose);
+u32 vgpu_fifo_default_timeslice_us(struct gk20a *g);
+int vgpu_tsg_open(struct tsg_gk20a *tsg);
+int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg,
+                        struct channel_gk20a *ch);
+int vgpu_tsg_unbind_channel(struct channel_gk20a *ch);
+int vgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
+int vgpu_enable_tsg(struct tsg_gk20a *tsg);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_gr_gm20b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_gr_gm20b.c
new file mode 100644
index 00000000..260ce080
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_gr_gm20b.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/css_gr_gk20a.h"
+#include "common/linux/vgpu/css_vgpu.h"
+#include "vgpu_gr_gm20b.h"
+void vgpu_gr_gm20b_init_cyclestats(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        bool snapshots_supported = true;
+        /* cyclestats not supported on vgpu */
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, false);
+        g->gr.max_css_buffer_size = vgpu_css_get_buffer_size(g);
+        /* snapshots not supported if the buffer size is 0 */
+        if (g->gr.max_css_buffer_size == 0)
+                snapshots_supported = false;
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT,
+                                                        snapshots_supported);
+#endif
+}
+int vgpu_gm20b_init_fs_state(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 tpc_index, gpc_index;
+        u32 sm_id = 0;
+        gk20a_dbg_fn("");
+        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
+                                                                tpc_index++) {
+                        g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
+                        g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
+                        sm_id++;
+                }
+        }
+        gr->no_of_sm = sm_id;
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_gr_gm20b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_gr_gm20b.h
new file mode 100644
index 00000000..f17de450
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_gr_gm20b.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __VGPU_GR_GM20B_H__
+#define __VGPU_GR_GM20B_H__
+#include "gk20a/gk20a.h"
+void vgpu_gr_gm20b_init_cyclestats(struct gk20a *g);
+int vgpu_gm20b_init_fs_state(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_hal_gm20b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_hal_gm20b.c
new file mode 100644
index 00000000..1a2d378a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gm20b/vgpu_hal_gm20b.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "gm20b/hal_gm20b.h"
+#include "common/linux/vgpu/vgpu.h"
+#include "common/linux/vgpu/fifo_vgpu.h"
+#include "common/linux/vgpu/gr_vgpu.h"
+#include "common/linux/vgpu/ltc_vgpu.h"
+#include "common/linux/vgpu/mm_vgpu.h"
+#include "common/linux/vgpu/dbg_vgpu.h"
+#include "common/linux/vgpu/fecs_trace_vgpu.h"
+#include "common/linux/vgpu/css_vgpu.h"
+#include "vgpu_gr_gm20b.h"
+#include "gk20a/bus_gk20a.h"
+#include "gk20a/flcn_gk20a.h"
+#include "gk20a/mc_gk20a.h"
+#include "gk20a/fb_gk20a.h"
+#include "gm20b/gr_gm20b.h"
+#include "gm20b/fifo_gm20b.h"
+#include "gm20b/acr_gm20b.h"
+#include "gm20b/pmu_gm20b.h"
+#include "gm20b/fb_gm20b.h"
+#include "gm20b/bus_gm20b.h"
+#include "gm20b/regops_gm20b.h"
+#include "gm20b/clk_gm20b.h"
+#include "gm20b/therm_gm20b.h"
+#include "gm20b/mm_gm20b.h"
+#include "gm20b/gr_ctx_gm20b.h"
+#include "gm20b/gm20b_gating_reglist.h"
+#include "gm20b/ltc_gm20b.h"
+#include <nvgpu/enabled.h>
+#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
+static const struct gpu_ops vgpu_gm20b_ops = {
+        .ltc = {
+                .determine_L2_size_bytes = vgpu_determine_L2_size_bytes,
+                .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry,
+                .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry,
+                .init_cbc = gm20b_ltc_init_cbc,
+                .init_fs_state = vgpu_ltc_init_fs_state,
+                .init_comptags = vgpu_ltc_init_comptags,
+                .cbc_ctrl = NULL,
+                .isr = gm20b_ltc_isr,
+                .cbc_fix_config = gm20b_ltc_cbc_fix_config,
+                .flush = gm20b_flush_ltc,
+                .set_enabled = gm20b_ltc_set_enabled,
+        },
+        .ce2 = {
+                .isr_stall = gk20a_ce2_isr,
+                .isr_nonstall = gk20a_ce2_nonstall_isr,
+                .get_num_pce = vgpu_ce_get_num_pce,
+        },
+        .gr = {
+                .get_patch_slots = gr_gk20a_get_patch_slots,
+                .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
+                .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
+                .cb_size_default = gr_gm20b_cb_size_default,
+                .calc_global_ctx_buffer_size =
+                        gr_gm20b_calc_global_ctx_buffer_size,
+                .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
+                .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
+                .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
+                .commit_global_pagepool = gr_gm20b_commit_global_pagepool,
+                .handle_sw_method = gr_gm20b_handle_sw_method,
+                .set_alpha_circular_buffer_size =
+                        gr_gm20b_set_alpha_circular_buffer_size,
+                .set_circular_buffer_size = gr_gm20b_set_circular_buffer_size,
+                .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
+                .is_valid_class = gr_gm20b_is_valid_class,
+                .is_valid_gfx_class = gr_gm20b_is_valid_gfx_class,
+                .is_valid_compute_class = gr_gm20b_is_valid_compute_class,
+                .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
+                .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
+                .init_fs_state = vgpu_gm20b_init_fs_state,
+                .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
+                .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
+                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
+                .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
+                .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
+                .free_channel_ctx = vgpu_gr_free_channel_ctx,
+                .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
+                .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
+                .get_zcull_info = vgpu_gr_get_zcull_info,
+                .is_tpc_addr = gr_gm20b_is_tpc_addr,
+                .get_tpc_num = gr_gm20b_get_tpc_num,
+                .detect_sm_arch = vgpu_gr_detect_sm_arch,
+                .add_zbc_color = gr_gk20a_add_zbc_color,
+                .add_zbc_depth = gr_gk20a_add_zbc_depth,
+                .zbc_set_table = vgpu_gr_add_zbc,
+                .zbc_query_table = vgpu_gr_query_zbc,
+                .pmu_save_zbc = gk20a_pmu_save_zbc,
+                .add_zbc = gr_gk20a_add_zbc,
+                .pagepool_default_size = gr_gm20b_pagepool_default_size,
+                .init_ctx_state = vgpu_gr_init_ctx_state,
+                .alloc_gr_ctx = vgpu_gr_alloc_gr_ctx,
+                .free_gr_ctx = vgpu_gr_free_gr_ctx,
+                .update_ctxsw_preemption_mode =
+                        gr_gm20b_update_ctxsw_preemption_mode,
+                .dump_gr_regs = NULL,
+                .update_pc_sampling = gr_gm20b_update_pc_sampling,
+                .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
+                .get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
+                .get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
+                .get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
+                .get_max_fbps_count = vgpu_gr_get_max_fbps_count,
+                .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
+                .wait_empty = gr_gk20a_wait_idle,
+                .init_cyclestats = vgpu_gr_gm20b_init_cyclestats,
+                .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
+                .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
+                .bpt_reg_info = gr_gm20b_bpt_reg_info,
+                .get_access_map = gr_gm20b_get_access_map,
+                .handle_fecs_error = gk20a_gr_handle_fecs_error,
+                .handle_sm_exception = gr_gk20a_handle_sm_exception,
+                .handle_tex_exception = gr_gk20a_handle_tex_exception,
+                .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
+                .enable_exceptions = gk20a_gr_enable_exceptions,
+                .get_lrf_tex_ltc_dram_override = NULL,
+                .update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
+                .update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
+                .record_sm_error_state = gm20b_gr_record_sm_error_state,
+                .update_sm_error_state = gm20b_gr_update_sm_error_state,
+                .clear_sm_error_state = vgpu_gr_clear_sm_error_state,
+                .suspend_contexts = vgpu_gr_suspend_contexts,
+                .resume_contexts = vgpu_gr_resume_contexts,
+                .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
+                .init_sm_id_table = gr_gk20a_init_sm_id_table,
+                .load_smid_config = gr_gm20b_load_smid_config,
+                .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
+                .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
+                .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
+                .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
+                .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
+                .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
+                .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
+                .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
+                .commit_inst = vgpu_gr_commit_inst,
+                .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
+                .write_pm_ptr = gr_gk20a_write_pm_ptr,
+                .init_elcg_mode = gr_gk20a_init_elcg_mode,
+                .load_tpc_mask = gr_gm20b_load_tpc_mask,
+                .inval_icache = gr_gk20a_inval_icache,
+                .trigger_suspend = gr_gk20a_trigger_suspend,
+                .wait_for_pause = gr_gk20a_wait_for_pause,
+                .resume_from_pause = gr_gk20a_resume_from_pause,
+                .clear_sm_errors = gr_gk20a_clear_sm_errors,
+                .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
+                .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
+                .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
+                .suspend_single_sm = gk20a_gr_suspend_single_sm,
+                .suspend_all_sms = gk20a_gr_suspend_all_sms,
+                .resume_single_sm = gk20a_gr_resume_single_sm,
+                .resume_all_sms = gk20a_gr_resume_all_sms,
+                .get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr,
+                .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
+                .get_sm_no_lock_down_hww_global_esr_mask =
+                        gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
+                .lock_down_sm = gk20a_gr_lock_down_sm,
+                .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
+                .clear_sm_hww = gm20b_gr_clear_sm_hww,
+                .init_ovr_sm_dsm_perf =  gk20a_gr_init_ovr_sm_dsm_perf,
+                .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
+                .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
+                .init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data,
+                .set_boosted_ctx = NULL,
+                .update_boosted_ctx = NULL,
+        },
+        .fb = {
+                .reset = fb_gk20a_reset,
+                .init_hw = gk20a_fb_init_hw,
+                .init_fs_state = fb_gm20b_init_fs_state,
+                .set_mmu_page_size = gm20b_fb_set_mmu_page_size,
+                .set_use_full_comp_tag_line =
+                        gm20b_fb_set_use_full_comp_tag_line,
+                .compression_page_size = gm20b_fb_compression_page_size,
+                .compressible_page_size = gm20b_fb_compressible_page_size,
+                .vpr_info_fetch = gm20b_fb_vpr_info_fetch,
+                .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info,
+                .read_wpr_info = gm20b_fb_read_wpr_info,
+                .is_debug_mode_enabled = NULL,
+                .set_debug_mode = vgpu_mm_mmu_set_debug_mode,
+                .tlb_invalidate = vgpu_mm_tlb_invalidate,
+        },
+        .clock_gating = {
+                .slcg_bus_load_gating_prod =
+                        gm20b_slcg_bus_load_gating_prod,
+                .slcg_ce2_load_gating_prod =
+                        gm20b_slcg_ce2_load_gating_prod,
+                .slcg_chiplet_load_gating_prod =
+                        gm20b_slcg_chiplet_load_gating_prod,
+                .slcg_ctxsw_firmware_load_gating_prod =
+                        gm20b_slcg_ctxsw_firmware_load_gating_prod,
+                .slcg_fb_load_gating_prod =
+                        gm20b_slcg_fb_load_gating_prod,
+                .slcg_fifo_load_gating_prod =
+                        gm20b_slcg_fifo_load_gating_prod,
+                .slcg_gr_load_gating_prod =
+                        gr_gm20b_slcg_gr_load_gating_prod,
+                .slcg_ltc_load_gating_prod =
+                        ltc_gm20b_slcg_ltc_load_gating_prod,
+                .slcg_perf_load_gating_prod =
+                        gm20b_slcg_perf_load_gating_prod,
+                .slcg_priring_load_gating_prod =
+                        gm20b_slcg_priring_load_gating_prod,
+                .slcg_pmu_load_gating_prod =
+                        gm20b_slcg_pmu_load_gating_prod,
+                .slcg_therm_load_gating_prod =
+                        gm20b_slcg_therm_load_gating_prod,
+                .slcg_xbar_load_gating_prod =
+                        gm20b_slcg_xbar_load_gating_prod,
+                .blcg_bus_load_gating_prod =
+                        gm20b_blcg_bus_load_gating_prod,
+                .blcg_ctxsw_firmware_load_gating_prod =
+                        gm20b_blcg_ctxsw_firmware_load_gating_prod,
+                .blcg_fb_load_gating_prod =
+                        gm20b_blcg_fb_load_gating_prod,
+                .blcg_fifo_load_gating_prod =
+                        gm20b_blcg_fifo_load_gating_prod,
+                .blcg_gr_load_gating_prod =
+                        gm20b_blcg_gr_load_gating_prod,
+                .blcg_ltc_load_gating_prod =
+                        gm20b_blcg_ltc_load_gating_prod,
+                .blcg_pwr_csb_load_gating_prod =
+                        gm20b_blcg_pwr_csb_load_gating_prod,
+                .blcg_xbar_load_gating_prod =
+                        gm20b_blcg_xbar_load_gating_prod,
+                .blcg_pmu_load_gating_prod =
+                        gm20b_blcg_pmu_load_gating_prod,
+                .pg_gr_load_gating_prod =
+                        gr_gm20b_pg_gr_load_gating_prod,
+        },
+        .fifo = {
+                .init_fifo_setup_hw = vgpu_init_fifo_setup_hw,
+                .bind_channel = vgpu_channel_bind,
+                .unbind_channel = vgpu_channel_unbind,
+                .disable_channel = vgpu_channel_disable,
+                .enable_channel = vgpu_channel_enable,
+                .alloc_inst = vgpu_channel_alloc_inst,
+                .free_inst = vgpu_channel_free_inst,
+                .setup_ramfc = vgpu_channel_setup_ramfc,
+                .channel_set_timeslice = vgpu_channel_set_timeslice,
+                .default_timeslice_us = vgpu_fifo_default_timeslice_us,
+                .setup_userd = gk20a_fifo_setup_userd,
+                .userd_gp_get = gk20a_fifo_userd_gp_get,
+                .userd_gp_put = gk20a_fifo_userd_gp_put,
+                .userd_pb_get = gk20a_fifo_userd_pb_get,
+                .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
+                .preempt_channel = vgpu_fifo_preempt_channel,
+                .preempt_tsg = vgpu_fifo_preempt_tsg,
+                .enable_tsg = vgpu_enable_tsg,
+                .disable_tsg = gk20a_disable_tsg,
+                .tsg_verify_channel_status = NULL,
+                .tsg_verify_status_ctx_reload = NULL,
+                .update_runlist = vgpu_fifo_update_runlist,
+                .trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault,
+                .get_mmu_fault_info = gk20a_fifo_get_mmu_fault_info,
+                .wait_engine_idle = vgpu_fifo_wait_engine_idle,
+                .get_num_fifos = gm20b_fifo_get_num_fifos,
+                .get_pbdma_signature = gk20a_fifo_get_pbdma_signature,
+                .set_runlist_interleave = vgpu_fifo_set_runlist_interleave,
+                .tsg_set_timeslice = vgpu_tsg_set_timeslice,
+                .tsg_open = vgpu_tsg_open,
+                .force_reset_ch = vgpu_fifo_force_reset_ch,
+                .engine_enum_from_type = gk20a_fifo_engine_enum_from_type,
+                .device_info_data_parse = gm20b_device_info_data_parse,
+                .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v,
+                .init_engine_info = vgpu_fifo_init_engine_info,
+                .runlist_entry_size = ram_rl_entry_size_v,
+                .get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry,
+                .get_ch_runlist_entry = gk20a_get_ch_runlist_entry,
+                .is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc,
+                .dump_pbdma_status = gk20a_dump_pbdma_status,
+                .dump_eng_status = gk20a_dump_eng_status,
+                .dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc,
+                .intr_0_error_mask = gk20a_fifo_intr_0_error_mask,
+                .is_preempt_pending = gk20a_fifo_is_preempt_pending,
+                .init_pbdma_intr_descs = gm20b_fifo_init_pbdma_intr_descs,
+                .reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
+                .teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg,
+                .handle_sched_error = gk20a_fifo_handle_sched_error,
+                .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
+                .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
+                .tsg_bind_channel = vgpu_tsg_bind_channel,
+                .tsg_unbind_channel = vgpu_tsg_unbind_channel,
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+                .alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
+                .free_syncpt_buf = gk20a_fifo_free_syncpt_buf,
+                .add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd,
+                .get_syncpt_wait_cmd_size = gk20a_fifo_get_syncpt_wait_cmd_size,
+                .add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd,
+                .get_syncpt_incr_cmd_size = gk20a_fifo_get_syncpt_incr_cmd_size,
+#endif
+        },
+        .gr_ctx = {
+                .get_netlist_name = gr_gm20b_get_netlist_name,
+                .is_fw_defined = gr_gm20b_is_firmware_defined,
+        },
+        .mm = {
+                .support_sparse = gm20b_mm_support_sparse,
+                .gmmu_map = vgpu_locked_gmmu_map,
+                .gmmu_unmap = vgpu_locked_gmmu_unmap,
+                .vm_bind_channel = vgpu_vm_bind_channel,
+                .fb_flush = vgpu_mm_fb_flush,
+                .l2_invalidate = vgpu_mm_l2_invalidate,
+                .l2_flush = vgpu_mm_l2_flush,
+                .cbc_clean = gk20a_mm_cbc_clean,
+                .set_big_page_size = gm20b_mm_set_big_page_size,
+                .get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+                .get_default_big_page_size = gm20b_mm_get_default_big_page_size,
+                .gpu_phys_addr = gm20b_gpu_phys_addr,
+                .get_iommu_bit = gk20a_mm_get_iommu_bit,
+                .get_mmu_levels = gk20a_mm_get_mmu_levels,
+                .init_pdb = gk20a_mm_init_pdb,
+                .init_mm_setup_hw = NULL,
+                .is_bar1_supported = gm20b_mm_is_bar1_supported,
+                .init_inst_block = gk20a_init_inst_block,
+                .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
+                .get_kind_invalid = gm20b_get_kind_invalid,
+                .get_kind_pitch = gm20b_get_kind_pitch,
+        },
+        .therm = {
+                .init_therm_setup_hw = gm20b_init_therm_setup_hw,
+                .elcg_init_idle_filters = gk20a_elcg_init_idle_filters,
+        },
+        .pmu = {
+                .pmu_setup_elpg = gm20b_pmu_setup_elpg,
+                .pmu_get_queue_head = pwr_pmu_queue_head_r,
+                .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v,
+                .pmu_get_queue_tail = pwr_pmu_queue_tail_r,
+                .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v,
+                .pmu_queue_head = gk20a_pmu_queue_head,
+                .pmu_queue_tail = gk20a_pmu_queue_tail,
+                .pmu_msgq_tail = gk20a_pmu_msgq_tail,
+                .pmu_mutex_size = pwr_pmu_mutex__size_1_v,
+                .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
+                .pmu_mutex_release = gk20a_pmu_mutex_release,
+                .write_dmatrfbase = gm20b_write_dmatrfbase,
+                .pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
+                .pmu_pg_init_param = NULL,
+                .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
+                .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
+                .pmu_is_lpwr_feature_supported = NULL,
+                .pmu_lpwr_enable_pg = NULL,
+                .pmu_lpwr_disable_pg = NULL,
+                .pmu_pg_param_post_init = NULL,
+                .dump_secure_fuses = pmu_dump_security_fuses_gm20b,
+                .reset_engine = gk20a_pmu_engine_reset,
+                .is_engine_in_reset = gk20a_pmu_is_engine_in_reset,
+        },
+        .clk = {
+                .init_clk_support = gm20b_init_clk_support,
+                .suspend_clk_support = gm20b_suspend_clk_support,
+#ifdef CONFIG_DEBUG_FS
+                .init_debugfs = gm20b_clk_init_debugfs,
+#endif
+                .get_voltage = gm20b_clk_get_voltage,
+                .get_gpcclk_clock_counter = gm20b_clk_get_gpcclk_clock_counter,
+                .pll_reg_write = gm20b_clk_pll_reg_write,
+                .get_pll_debug_data = gm20b_clk_get_pll_debug_data,
+        },
+        .regops = {
+                .get_global_whitelist_ranges =
+                        gm20b_get_global_whitelist_ranges,
+                .get_global_whitelist_ranges_count =
+                        gm20b_get_global_whitelist_ranges_count,
+                .get_context_whitelist_ranges =
+                        gm20b_get_context_whitelist_ranges,
+                .get_context_whitelist_ranges_count =
+                        gm20b_get_context_whitelist_ranges_count,
+                .get_runcontrol_whitelist = gm20b_get_runcontrol_whitelist,
+                .get_runcontrol_whitelist_count =
+                        gm20b_get_runcontrol_whitelist_count,
+                .get_runcontrol_whitelist_ranges =
+                        gm20b_get_runcontrol_whitelist_ranges,
+                .get_runcontrol_whitelist_ranges_count =
+                        gm20b_get_runcontrol_whitelist_ranges_count,
+                .get_qctl_whitelist = gm20b_get_qctl_whitelist,
+                .get_qctl_whitelist_count = gm20b_get_qctl_whitelist_count,
+                .get_qctl_whitelist_ranges = gm20b_get_qctl_whitelist_ranges,
+                .get_qctl_whitelist_ranges_count =
+                        gm20b_get_qctl_whitelist_ranges_count,
+                .apply_smpc_war = gm20b_apply_smpc_war,
+        },
+        .mc = {
+                .intr_enable = mc_gk20a_intr_enable,
+                .intr_unit_config = mc_gk20a_intr_unit_config,
+                .isr_stall = mc_gk20a_isr_stall,
+                .intr_stall = mc_gk20a_intr_stall,
+                .intr_stall_pause = mc_gk20a_intr_stall_pause,
+                .intr_stall_resume = mc_gk20a_intr_stall_resume,
+                .intr_nonstall = mc_gk20a_intr_nonstall,
+                .intr_nonstall_pause = mc_gk20a_intr_nonstall_pause,
+                .intr_nonstall_resume = mc_gk20a_intr_nonstall_resume,
+                .enable = gk20a_mc_enable,
+                .disable = gk20a_mc_disable,
+                .reset = gk20a_mc_reset,
+                .boot_0 = gk20a_mc_boot_0,
+                .is_intr1_pending = mc_gk20a_is_intr1_pending,
+        },
+        .debug = {
+                .show_dump = NULL,
+        },
+        .dbg_session_ops = {
+                .exec_reg_ops = vgpu_exec_regops,
+                .dbg_set_powergate = vgpu_dbg_set_powergate,
+                .check_and_set_global_reservation =
+                        vgpu_check_and_set_global_reservation,
+                .check_and_set_context_reservation =
+                        vgpu_check_and_set_context_reservation,
+                .release_profiler_reservation =
+                        vgpu_release_profiler_reservation,
+                .perfbuffer_enable = vgpu_perfbuffer_enable,
+                .perfbuffer_disable = vgpu_perfbuffer_disable,
+        },
+        .bus = {
+                .init_hw = gk20a_bus_init_hw,
+                .isr = gk20a_bus_isr,
+                .read_ptimer = vgpu_read_ptimer,
+                .get_timestamps_zipper = vgpu_get_timestamps_zipper,
+                .bar1_bind = gm20b_bus_bar1_bind,
+        },
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        .css = {
+                .enable_snapshot = vgpu_css_enable_snapshot_buffer,
+                .disable_snapshot = vgpu_css_release_snapshot_buffer,
+                .check_data_available = vgpu_css_flush_snapshots,
+                .detach_snapshot = vgpu_css_detach,
+                .set_handled_snapshots = NULL,
+                .allocate_perfmon_ids = NULL,
+                .release_perfmon_ids = NULL,
+        },
+#endif
+        .falcon = {
+                .falcon_hal_sw_init = gk20a_falcon_hal_sw_init,
+        },
+        .priv_ring = {
+                .isr = gk20a_priv_ring_isr,
+        },
+        .chip_init_gpu_characteristics = vgpu_init_gpu_characteristics,
+        .get_litter_value = gm20b_get_litter_value,
+};
+int vgpu_gm20b_init_hal(struct gk20a *g)
+{
+        struct gpu_ops *gops = &g->ops;
+        u32 val;
+        gops->ltc = vgpu_gm20b_ops.ltc;
+        gops->ce2 = vgpu_gm20b_ops.ce2;
+        gops->gr = vgpu_gm20b_ops.gr;
+        gops->fb = vgpu_gm20b_ops.fb;
+        gops->clock_gating = vgpu_gm20b_ops.clock_gating;
+        gops->fifo = vgpu_gm20b_ops.fifo;
+        gops->gr_ctx = vgpu_gm20b_ops.gr_ctx;
+        gops->mm = vgpu_gm20b_ops.mm;
+        gops->therm = vgpu_gm20b_ops.therm;
+        gops->pmu = vgpu_gm20b_ops.pmu;
+        /*
+         * clk must be assigned member by member
+         * since some clk ops are assigned during probe prior to HAL init
+         */
+        gops->clk.init_clk_support = vgpu_gm20b_ops.clk.init_clk_support;
+        gops->clk.suspend_clk_support = vgpu_gm20b_ops.clk.suspend_clk_support;
+        gops->clk.get_voltage = vgpu_gm20b_ops.clk.get_voltage;
+        gops->clk.get_gpcclk_clock_counter =
+                vgpu_gm20b_ops.clk.get_gpcclk_clock_counter;
+        gops->clk.pll_reg_write = vgpu_gm20b_ops.clk.pll_reg_write;
+        gops->clk.get_pll_debug_data = vgpu_gm20b_ops.clk.get_pll_debug_data;
+        gops->regops = vgpu_gm20b_ops.regops;
+        gops->mc = vgpu_gm20b_ops.mc;
+        gops->dbg_session_ops = vgpu_gm20b_ops.dbg_session_ops;
+        gops->debug = vgpu_gm20b_ops.debug;
+        gops->bus = vgpu_gm20b_ops.bus;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        gops->css = vgpu_gm20b_ops.css;
+#endif
+        gops->falcon = vgpu_gm20b_ops.falcon;
+        gops->priv_ring = vgpu_gm20b_ops.priv_ring;
+        /* Lone functions */
+        gops->chip_init_gpu_characteristics =
+                vgpu_gm20b_ops.chip_init_gpu_characteristics;
+        gops->get_litter_value = vgpu_gm20b_ops.get_litter_value;
+        __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
+        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
+        __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
+#ifdef CONFIG_TEGRA_ACR
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+        } else {
+                val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+                if (!val) {
+                        gk20a_dbg_info("priv security is disabled in HW");
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                } else {
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                }
+        }
+#else
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                gk20a_dbg_info("running ASIM with PRIV security disabled");
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+        } else {
+                val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+                if (!val) {
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                } else {
+                        gk20a_dbg_info("priv security is not supported but enabled");
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                        return -EPERM;
+                }
+        }
+#endif
+        /* priv security dependent ops */
+        if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                /* Add in ops from gm20b acr */
+                gops->pmu.is_pmu_supported = gm20b_is_pmu_supported;
+                gops->pmu.prepare_ucode = prepare_ucode_blob;
+                gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn;
+                gops->pmu.is_lazy_bootstrap = gm20b_is_lazy_bootstrap;
+                gops->pmu.is_priv_load = gm20b_is_priv_load;
+                gops->pmu.get_wpr = gm20b_wpr_info;
+                gops->pmu.alloc_blob_space = gm20b_alloc_blob_space;
+                gops->pmu.pmu_populate_loader_cfg =
+                        gm20b_pmu_populate_loader_cfg;
+                gops->pmu.flcn_populate_bl_dmem_desc =
+                        gm20b_flcn_populate_bl_dmem_desc;
+                gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt;
+                gops->pmu.falcon_clear_halt_interrupt_status =
+                        clear_halt_interrupt_status;
+                gops->pmu.init_falcon_setup_hw = gm20b_init_pmu_setup_hw1;
+                gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+                gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
+                gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
+        } else {
+                /* Inherit from gk20a */
+                gops->pmu.is_pmu_supported = gk20a_is_pmu_supported;
+                gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob;
+                gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
+                gops->pmu.pmu_nsbootstrap = pmu_bootstrap;
+                gops->pmu.load_lsfalcon_ucode = NULL;
+                gops->pmu.init_wpr_region = NULL;
+                gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
+        }
+        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+        g->pmu_lsf_pmu_wpr_init_done = 0;
+        g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+        g->name = "gm20b";
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c
new file mode 100644
index 00000000..cc006f76
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "vgpu_fifo_gp10b.h"
+void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops)
+{
+        /* syncpoint protection not supported yet */
+        gops->fifo.resetup_ramfc = NULL;
+        gops->fifo.reschedule_runlist = NULL;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
new file mode 100644
index 00000000..efc9c595
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/bug.h>
+#include "common/linux/vgpu/vgpu.h"
+#include "common/linux/vgpu/gm20b/vgpu_gr_gm20b.h"
+#include "vgpu_gr_gp10b.h"
+#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
+void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+                                struct gr_ctx_desc *gr_ctx)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        if (!gr_ctx || !gr_ctx->mem.gpu_va)
+                return;
+        msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
+        msg.handle = vgpu_get_handle(g);
+        p->gr_ctx_handle = gr_ctx->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, gmmu_page_size_kernel);
+        nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
+        nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
+        nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
+        nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+        nvgpu_kfree(g, gr_ctx);
+}
+int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+                                struct gr_ctx_desc **__gr_ctx,
+                                struct vm_gk20a *vm,
+                                u32 class,
+                                u32 flags)
+{
+        struct gr_ctx_desc *gr_ctx;
+        u32 graphics_preempt_mode = 0;
+        u32 compute_preempt_mode = 0;
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        int err;
+        gk20a_dbg_fn("");
+        err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags);
+        if (err)
+                return err;
+        gr_ctx = *__gr_ctx;
+        if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
+                graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
+        if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP)
+                compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
+        if (priv->constants.force_preempt_mode && !graphics_preempt_mode &&
+                !compute_preempt_mode) {
+                graphics_preempt_mode = g->ops.gr.is_valid_gfx_class(g, class) ?
+                                        NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP : 0;
+                compute_preempt_mode =
+                        g->ops.gr.is_valid_compute_class(g, class) ?
+                        NVGPU_PREEMPTION_MODE_COMPUTE_CTA : 0;
+        }
+        if (graphics_preempt_mode || compute_preempt_mode) {
+                if (g->ops.gr.set_ctxsw_preemption_mode) {
+                        err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
+                            class, graphics_preempt_mode, compute_preempt_mode);
+                        if (err) {
+                                nvgpu_err(g,
+                                        "set_ctxsw_preemption_mode failed");
+                                goto fail;
+                        }
+                } else {
+                        err = -ENOSYS;
+                        goto fail;
+                }
+        }
+        gk20a_dbg_fn("done");
+        return err;
+fail:
+        vgpu_gr_gp10b_free_gr_ctx(g, vm, gr_ctx);
+        return err;
+}
+int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
+                                struct gr_ctx_desc *gr_ctx,
+                                struct vm_gk20a *vm, u32 class,
+                                u32 graphics_preempt_mode,
+                                u32 compute_preempt_mode)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p =
+                                &msg.params.gr_bind_ctxsw_buffers;
+        int err = 0;
+        if (g->ops.gr.is_valid_gfx_class(g, class) &&
+                        g->gr.t18x.ctx_vars.force_preemption_gfxp)
+                graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
+        if (g->ops.gr.is_valid_compute_class(g, class) &&
+                        g->gr.t18x.ctx_vars.force_preemption_cilp)
+                compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
+        /* check for invalid combinations */
+        if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
+                return -EINVAL;
+        if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
+                   (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
+                return -EINVAL;
+        /* set preemption modes */
+        switch (graphics_preempt_mode) {
+        case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
+        {
+                u32 spill_size =
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+                u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+                        gr_scc_pagepool_total_pages_byte_granularity_v();
+                u32 betacb_size = g->gr.attrib_cb_default_size +
+                                  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+                                   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+                u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
+                                  gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+                                  g->gr.max_tpc_count;
+                struct nvgpu_mem *desc;
+                attrib_cb_size = ALIGN(attrib_cb_size, 128);
+                gk20a_dbg_info("gfxp context preempt size=%d",
+                        g->gr.t18x.ctx_vars.preempt_image_size);
+                gk20a_dbg_info("gfxp context spill size=%d", spill_size);
+                gk20a_dbg_info("gfxp context pagepool size=%d", pagepool_size);
+                gk20a_dbg_info("gfxp context attrib cb size=%d",
+                        attrib_cb_size);
+                err = gr_gp10b_alloc_buffer(vm,
+                                        g->gr.t18x.ctx_vars.preempt_image_size,
+                                        &gr_ctx->t18x.preempt_ctxsw_buffer);
+                if (err) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+                desc = &gr_ctx->t18x.preempt_ctxsw_buffer;
+                p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->gpu_va;
+                p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->size;
+                err = gr_gp10b_alloc_buffer(vm,
+                                        spill_size,
+                                        &gr_ctx->t18x.spill_ctxsw_buffer);
+                if (err) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+                desc = &gr_ctx->t18x.spill_ctxsw_buffer;
+                p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->gpu_va;
+                p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->size;
+                err = gr_gp10b_alloc_buffer(vm,
+                                        pagepool_size,
+                                        &gr_ctx->t18x.pagepool_ctxsw_buffer);
+                if (err) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+                desc = &gr_ctx->t18x.pagepool_ctxsw_buffer;
+                p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] =
+                        desc->gpu_va;
+                p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = desc->size;
+                err = gr_gp10b_alloc_buffer(vm,
+                                        attrib_cb_size,
+                                        &gr_ctx->t18x.betacb_ctxsw_buffer);
+                if (err) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+                desc = &gr_ctx->t18x.betacb_ctxsw_buffer;
+                p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] =
+                        desc->gpu_va;
+                p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = desc->size;
+                gr_ctx->graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
+                p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_GFX_GFXP;
+                break;
+        }
+        case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
+                gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+                break;
+        default:
+                break;
+        }
+        if (g->ops.gr.is_valid_compute_class(g, class)) {
+                switch (compute_preempt_mode) {
+                case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
+                        gr_ctx->compute_preempt_mode =
+                                NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
+                        p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_WFI;
+                        break;
+                case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
+                        gr_ctx->compute_preempt_mode =
+                                NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
+                        p->mode =
+                                TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA;
+                        break;
+                case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
+                        gr_ctx->compute_preempt_mode =
+                                NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
+                        p->mode =
+                                TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP;
+                        break;
+                default:
+                        break;
+                }
+        }
+        if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode) {
+                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTXSW_BUFFERS;
+                msg.handle = vgpu_get_handle(g);
+                p->gr_ctx_handle = gr_ctx->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                if (err || msg.ret) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+        }
+        return err;
+fail:
+        nvgpu_err(g, "%s failed %d", __func__, err);
+        return err;
+}
+int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
+                                        u32 graphics_preempt_mode,
+                                        u32 compute_preempt_mode)
+{
+        struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
+        struct gk20a *g = ch->g;
+        struct tsg_gk20a *tsg;
+        struct vm_gk20a *vm;
+        u32 class;
+        int err;
+        class = ch->obj_class;
+        if (!class)
+                return -EINVAL;
+        /* skip setting anything if both modes are already set */
+        if (graphics_preempt_mode &&
+           (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
+                graphics_preempt_mode = 0;
+        if (compute_preempt_mode &&
+           (compute_preempt_mode == gr_ctx->compute_preempt_mode))
+                compute_preempt_mode = 0;
+        if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
+                return 0;
+        if (gk20a_is_channel_marked_as_tsg(ch)) {
+                tsg = &g->fifo.tsg[ch->tsgid];
+                vm = tsg->vm;
+        } else {
+                vm = ch->vm;
+        }
+        if (g->ops.gr.set_ctxsw_preemption_mode) {
+                err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
+                                                graphics_preempt_mode,
+                                                compute_preempt_mode);
+                if (err) {
+                        nvgpu_err(g, "set_ctxsw_preemption_mode failed");
+                        return err;
+                }
+        } else {
+                err = -ENOSYS;
+        }
+        return err;
+}
+int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        int err;
+        gk20a_dbg_fn("");
+        err = vgpu_gr_init_ctx_state(g);
+        if (err)
+                return err;
+        g->gr.t18x.ctx_vars.preempt_image_size =
+                        priv->constants.preempt_ctx_size;
+        if (!g->gr.t18x.ctx_vars.preempt_image_size)
+                return -EINVAL;
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
new file mode 100644
index 00000000..a11dab7d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __VGPU_GR_GP10B_H__
+#define __VGPU_GR_GP10B_H__
+#include "gk20a/gk20a.h"
+void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+                                struct gr_ctx_desc *gr_ctx);
+int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+                                struct gr_ctx_desc **__gr_ctx,
+                                struct vm_gk20a *vm,
+                                u32 class,
+                                u32 flags);
+int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
+                                struct gr_ctx_desc *gr_ctx,
+                                struct vm_gk20a *vm, u32 class,
+                                u32 graphics_preempt_mode,
+                                u32 compute_preempt_mode);
+int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
+                                        u32 graphics_preempt_mode,
+                                        u32 compute_preempt_mode);
+int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
new file mode 100644
index 00000000..da4ca10c
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "common/linux/vgpu/vgpu.h"
+#include "common/linux/vgpu/fifo_vgpu.h"
+#include "common/linux/vgpu/gr_vgpu.h"
+#include "common/linux/vgpu/ltc_vgpu.h"
+#include "common/linux/vgpu/mm_vgpu.h"
+#include "common/linux/vgpu/dbg_vgpu.h"
+#include "common/linux/vgpu/fecs_trace_vgpu.h"
+#include "common/linux/vgpu/css_vgpu.h"
+#include "gp10b/gp10b.h"
+#include "gp10b/hal_gp10b.h"
+#include "common/linux/vgpu/gm20b/vgpu_gr_gm20b.h"
+#include "vgpu_gr_gp10b.h"
+#include "vgpu_mm_gp10b.h"
+#include "gk20a/bus_gk20a.h"
+#include "gk20a/pramin_gk20a.h"
+#include "gk20a/flcn_gk20a.h"
+#include "gk20a/mc_gk20a.h"
+#include "gk20a/fb_gk20a.h"
+#include "gp10b/mc_gp10b.h"
+#include "gp10b/ltc_gp10b.h"
+#include "gp10b/mm_gp10b.h"
+#include "gp10b/ce_gp10b.h"
+#include "gp10b/fb_gp10b.h"
+#include "gp10b/pmu_gp10b.h"
+#include "gp10b/gr_ctx_gp10b.h"
+#include "gp10b/fifo_gp10b.h"
+#include "gp10b/gp10b_gating_reglist.h"
+#include "gp10b/regops_gp10b.h"
+#include "gp10b/therm_gp10b.h"
+#include "gp10b/priv_ring_gp10b.h"
+#include "gm20b/ltc_gm20b.h"
+#include "gm20b/gr_gm20b.h"
+#include "gm20b/fifo_gm20b.h"
+#include "gm20b/acr_gm20b.h"
+#include "gm20b/pmu_gm20b.h"
+#include "gm20b/fb_gm20b.h"
+#include "gm20b/mm_gm20b.h"
+#include <nvgpu/enabled.h>
+#include <nvgpu/hw/gp10b/hw_fuse_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_fifo_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_ram_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_top_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_pram_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_pwr_gp10b.h>
+static const struct gpu_ops vgpu_gp10b_ops = {
+        .ltc = {
+                .determine_L2_size_bytes = vgpu_determine_L2_size_bytes,
+                .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry,
+                .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry,
+                .init_cbc = gm20b_ltc_init_cbc,
+                .init_fs_state = vgpu_ltc_init_fs_state,
+                .init_comptags = vgpu_ltc_init_comptags,
+                .cbc_ctrl = NULL,
+                .isr = gp10b_ltc_isr,
+                .cbc_fix_config = gm20b_ltc_cbc_fix_config,
+                .flush = gm20b_flush_ltc,
+                .set_enabled = gp10b_ltc_set_enabled,
+        },
+        .ce2 = {
+                .isr_stall = gp10b_ce_isr,
+                .isr_nonstall = gp10b_ce_nonstall_isr,
+                .get_num_pce = vgpu_ce_get_num_pce,
+        },
+        .gr = {
+                .get_patch_slots = gr_gk20a_get_patch_slots,
+                .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
+                .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
+                .cb_size_default = gr_gp10b_cb_size_default,
+                .calc_global_ctx_buffer_size =
+                        gr_gp10b_calc_global_ctx_buffer_size,
+                .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
+                .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
+                .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
+                .commit_global_pagepool = gr_gp10b_commit_global_pagepool,
+                .handle_sw_method = gr_gp10b_handle_sw_method,
+                .set_alpha_circular_buffer_size =
+                        gr_gp10b_set_alpha_circular_buffer_size,
+                .set_circular_buffer_size = gr_gp10b_set_circular_buffer_size,
+                .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
+                .is_valid_class = gr_gp10b_is_valid_class,
+                .is_valid_gfx_class = gr_gp10b_is_valid_gfx_class,
+                .is_valid_compute_class = gr_gp10b_is_valid_compute_class,
+                .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
+                .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
+                .init_fs_state = vgpu_gm20b_init_fs_state,
+                .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
+                .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
+                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
+                .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
+                .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
+                .free_channel_ctx = vgpu_gr_free_channel_ctx,
+                .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
+                .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
+                .get_zcull_info = vgpu_gr_get_zcull_info,
+                .is_tpc_addr = gr_gm20b_is_tpc_addr,
+                .get_tpc_num = gr_gm20b_get_tpc_num,
+                .detect_sm_arch = vgpu_gr_detect_sm_arch,
+                .add_zbc_color = gr_gp10b_add_zbc_color,
+                .add_zbc_depth = gr_gp10b_add_zbc_depth,
+                .zbc_set_table = vgpu_gr_add_zbc,
+                .zbc_query_table = vgpu_gr_query_zbc,
+                .pmu_save_zbc = gk20a_pmu_save_zbc,
+                .add_zbc = gr_gk20a_add_zbc,
+                .pagepool_default_size = gr_gp10b_pagepool_default_size,
+                .init_ctx_state = vgpu_gr_gp10b_init_ctx_state,
+                .alloc_gr_ctx = vgpu_gr_gp10b_alloc_gr_ctx,
+                .free_gr_ctx = vgpu_gr_gp10b_free_gr_ctx,
+                .update_ctxsw_preemption_mode =
+                        gr_gp10b_update_ctxsw_preemption_mode,
+                .dump_gr_regs = NULL,
+                .update_pc_sampling = gr_gm20b_update_pc_sampling,
+                .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
+                .get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
+                .get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
+                .get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
+                .get_max_fbps_count = vgpu_gr_get_max_fbps_count,
+                .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
+                .wait_empty = gr_gp10b_wait_empty,
+                .init_cyclestats = vgpu_gr_gm20b_init_cyclestats,
+                .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
+                .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
+                .bpt_reg_info = gr_gm20b_bpt_reg_info,
+                .get_access_map = gr_gp10b_get_access_map,
+                .handle_fecs_error = gr_gp10b_handle_fecs_error,
+                .handle_sm_exception = gr_gp10b_handle_sm_exception,
+                .handle_tex_exception = gr_gp10b_handle_tex_exception,
+                .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
+                .enable_exceptions = gk20a_gr_enable_exceptions,
+                .get_lrf_tex_ltc_dram_override = get_ecc_override_val,
+                .update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
+                .update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
+                .record_sm_error_state = gm20b_gr_record_sm_error_state,
+                .update_sm_error_state = gm20b_gr_update_sm_error_state,
+                .clear_sm_error_state = vgpu_gr_clear_sm_error_state,
+                .suspend_contexts = vgpu_gr_suspend_contexts,
+                .resume_contexts = vgpu_gr_resume_contexts,
+                .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
+                .init_sm_id_table = gr_gk20a_init_sm_id_table,
+                .load_smid_config = gr_gp10b_load_smid_config,
+                .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
+                .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
+                .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
+                .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
+                .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
+                .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
+                .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
+                .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
+                .commit_inst = vgpu_gr_commit_inst,
+                .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
+                .write_pm_ptr = gr_gk20a_write_pm_ptr,
+                .init_elcg_mode = gr_gk20a_init_elcg_mode,
+                .load_tpc_mask = gr_gm20b_load_tpc_mask,
+                .inval_icache = gr_gk20a_inval_icache,
+                .trigger_suspend = gr_gk20a_trigger_suspend,
+                .wait_for_pause = gr_gk20a_wait_for_pause,
+                .resume_from_pause = gr_gk20a_resume_from_pause,
+                .clear_sm_errors = gr_gk20a_clear_sm_errors,
+                .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
+                .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
+                .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
+                .suspend_single_sm = gk20a_gr_suspend_single_sm,
+                .suspend_all_sms = gk20a_gr_suspend_all_sms,
+                .resume_single_sm = gk20a_gr_resume_single_sm,
+                .resume_all_sms = gk20a_gr_resume_all_sms,
+                .get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr,
+                .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
+                .get_sm_no_lock_down_hww_global_esr_mask =
+                        gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
+                .lock_down_sm = gk20a_gr_lock_down_sm,
+                .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
+                .clear_sm_hww = gm20b_gr_clear_sm_hww,
+                .init_ovr_sm_dsm_perf =  gk20a_gr_init_ovr_sm_dsm_perf,
+                .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
+                .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
+                .set_boosted_ctx = NULL,
+                .set_preemption_mode = vgpu_gr_gp10b_set_preemption_mode,
+                .set_czf_bypass = gr_gp10b_set_czf_bypass,
+                .init_czf_bypass = gr_gp10b_init_czf_bypass,
+                .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
+                .set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va,
+                .init_preemption_state = gr_gp10b_init_preemption_state,
+                .update_boosted_ctx = NULL,
+                .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
+                .create_gr_sysfs = gr_gp10b_create_sysfs,
+                .set_ctxsw_preemption_mode =
+                                        vgpu_gr_gp10b_set_ctxsw_preemption_mode,
+                .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
+        },
+        .fb = {
+                .reset = fb_gk20a_reset,
+                .init_hw = gk20a_fb_init_hw,
+                .init_fs_state = fb_gm20b_init_fs_state,
+                .set_mmu_page_size = gm20b_fb_set_mmu_page_size,
+                .set_use_full_comp_tag_line =
+                        gm20b_fb_set_use_full_comp_tag_line,
+                .compression_page_size = gp10b_fb_compression_page_size,
+                .compressible_page_size = gp10b_fb_compressible_page_size,
+                .vpr_info_fetch = gm20b_fb_vpr_info_fetch,
+                .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info,
+                .read_wpr_info = gm20b_fb_read_wpr_info,
+                .is_debug_mode_enabled = NULL,
+                .set_debug_mode = vgpu_mm_mmu_set_debug_mode,
+                .tlb_invalidate = vgpu_mm_tlb_invalidate,
+        },
+        .clock_gating = {
+                .slcg_bus_load_gating_prod =
+                        gp10b_slcg_bus_load_gating_prod,
+                .slcg_ce2_load_gating_prod =
+                        gp10b_slcg_ce2_load_gating_prod,
+                .slcg_chiplet_load_gating_prod =
+                        gp10b_slcg_chiplet_load_gating_prod,
+                .slcg_ctxsw_firmware_load_gating_prod =
+                        gp10b_slcg_ctxsw_firmware_load_gating_prod,
+                .slcg_fb_load_gating_prod =
+                        gp10b_slcg_fb_load_gating_prod,
+                .slcg_fifo_load_gating_prod =
+                        gp10b_slcg_fifo_load_gating_prod,
+                .slcg_gr_load_gating_prod =
+                        gr_gp10b_slcg_gr_load_gating_prod,
+                .slcg_ltc_load_gating_prod =
+                        ltc_gp10b_slcg_ltc_load_gating_prod,
+                .slcg_perf_load_gating_prod =
+                        gp10b_slcg_perf_load_gating_prod,
+                .slcg_priring_load_gating_prod =
+                        gp10b_slcg_priring_load_gating_prod,
+                .slcg_pmu_load_gating_prod =
+                        gp10b_slcg_pmu_load_gating_prod,
+                .slcg_therm_load_gating_prod =
+                        gp10b_slcg_therm_load_gating_prod,
+                .slcg_xbar_load_gating_prod =
+                        gp10b_slcg_xbar_load_gating_prod,
+                .blcg_bus_load_gating_prod =
+                        gp10b_blcg_bus_load_gating_prod,
+                .blcg_ce_load_gating_prod =
+                        gp10b_blcg_ce_load_gating_prod,
+                .blcg_ctxsw_firmware_load_gating_prod =
+                        gp10b_blcg_ctxsw_firmware_load_gating_prod,
+                .blcg_fb_load_gating_prod =
+                        gp10b_blcg_fb_load_gating_prod,
+                .blcg_fifo_load_gating_prod =
+                        gp10b_blcg_fifo_load_gating_prod,
+                .blcg_gr_load_gating_prod =
+                        gp10b_blcg_gr_load_gating_prod,
+                .blcg_ltc_load_gating_prod =
+                        gp10b_blcg_ltc_load_gating_prod,
+                .blcg_pwr_csb_load_gating_prod =
+                        gp10b_blcg_pwr_csb_load_gating_prod,
+                .blcg_pmu_load_gating_prod =
+                        gp10b_blcg_pmu_load_gating_prod,
+                .blcg_xbar_load_gating_prod =
+                        gp10b_blcg_xbar_load_gating_prod,
+                .pg_gr_load_gating_prod =
+                        gr_gp10b_pg_gr_load_gating_prod,
+        },
+        .fifo = {
+                .init_fifo_setup_hw = vgpu_init_fifo_setup_hw,
+                .bind_channel = vgpu_channel_bind,
+                .unbind_channel = vgpu_channel_unbind,
+                .disable_channel = vgpu_channel_disable,
+                .enable_channel = vgpu_channel_enable,
+                .alloc_inst = vgpu_channel_alloc_inst,
+                .free_inst = vgpu_channel_free_inst,
+                .setup_ramfc = vgpu_channel_setup_ramfc,
+                .channel_set_timeslice = vgpu_channel_set_timeslice,
+                .default_timeslice_us = vgpu_fifo_default_timeslice_us,
+                .setup_userd = gk20a_fifo_setup_userd,
+                .userd_gp_get = gk20a_fifo_userd_gp_get,
+                .userd_gp_put = gk20a_fifo_userd_gp_put,
+                .userd_pb_get = gk20a_fifo_userd_pb_get,
+                .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
+                .preempt_channel = vgpu_fifo_preempt_channel,
+                .preempt_tsg = vgpu_fifo_preempt_tsg,
+                .enable_tsg = vgpu_enable_tsg,
+                .disable_tsg = gk20a_disable_tsg,
+                .tsg_verify_channel_status = NULL,
+                .tsg_verify_status_ctx_reload = NULL,
+                .reschedule_runlist = NULL,
+                .update_runlist = vgpu_fifo_update_runlist,
+                .trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault,
+                .get_mmu_fault_info = gp10b_fifo_get_mmu_fault_info,
+                .wait_engine_idle = vgpu_fifo_wait_engine_idle,
+                .get_num_fifos = gm20b_fifo_get_num_fifos,
+                .get_pbdma_signature = gp10b_fifo_get_pbdma_signature,
+                .set_runlist_interleave = vgpu_fifo_set_runlist_interleave,
+                .tsg_set_timeslice = vgpu_tsg_set_timeslice,
+                .tsg_open = vgpu_tsg_open,
+                .force_reset_ch = vgpu_fifo_force_reset_ch,
+                .engine_enum_from_type = gp10b_fifo_engine_enum_from_type,
+                .device_info_data_parse = gp10b_device_info_data_parse,
+                .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v,
+                .init_engine_info = vgpu_fifo_init_engine_info,
+                .runlist_entry_size = ram_rl_entry_size_v,
+                .get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry,
+                .get_ch_runlist_entry = gk20a_get_ch_runlist_entry,
+                .is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc,
+                .dump_pbdma_status = gk20a_dump_pbdma_status,
+                .dump_eng_status = gk20a_dump_eng_status,
+                .dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc,
+                .intr_0_error_mask = gk20a_fifo_intr_0_error_mask,
+                .is_preempt_pending = gk20a_fifo_is_preempt_pending,
+                .init_pbdma_intr_descs = gp10b_fifo_init_pbdma_intr_descs,
+                .reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
+                .teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg,
+                .handle_sched_error = gk20a_fifo_handle_sched_error,
+                .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
+                .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
+                .tsg_bind_channel = vgpu_tsg_bind_channel,
+                .tsg_unbind_channel = vgpu_tsg_unbind_channel,
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+                .alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
+                .free_syncpt_buf = gk20a_fifo_free_syncpt_buf,
+                .add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd,
+                .get_syncpt_wait_cmd_size = gk20a_fifo_get_syncpt_wait_cmd_size,
+                .add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd,
+                .get_syncpt_incr_cmd_size = gk20a_fifo_get_syncpt_incr_cmd_size,
+#endif
+                .resetup_ramfc = NULL,
+                .device_info_fault_id = top_device_info_data_fault_id_enum_v,
+        },
+        .gr_ctx = {
+                .get_netlist_name = gr_gp10b_get_netlist_name,
+                .is_fw_defined = gr_gp10b_is_firmware_defined,
+        },
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        .fecs_trace = {
+                .alloc_user_buffer = vgpu_alloc_user_buffer,
+                .free_user_buffer = vgpu_free_user_buffer,
+                .mmap_user_buffer = vgpu_mmap_user_buffer,
+                .init = vgpu_fecs_trace_init,
+                .deinit = vgpu_fecs_trace_deinit,
+                .enable = vgpu_fecs_trace_enable,
+                .disable = vgpu_fecs_trace_disable,
+                .is_enabled = vgpu_fecs_trace_is_enabled,
+                .reset = NULL,
+                .flush = NULL,
+                .poll = vgpu_fecs_trace_poll,
+                .bind_channel = NULL,
+                .unbind_channel = NULL,
+                .max_entries = vgpu_fecs_trace_max_entries,
+                .set_filter = vgpu_fecs_trace_set_filter,
+        },
+#endif /* CONFIG_GK20A_CTXSW_TRACE */
+        .mm = {
+                /* FIXME: add support for sparse mappings */
+                .support_sparse = NULL,
+                .gmmu_map = vgpu_gp10b_locked_gmmu_map,
+                .gmmu_unmap = vgpu_locked_gmmu_unmap,
+                .vm_bind_channel = vgpu_vm_bind_channel,
+                .fb_flush = vgpu_mm_fb_flush,
+                .l2_invalidate = vgpu_mm_l2_invalidate,
+                .l2_flush = vgpu_mm_l2_flush,
+                .cbc_clean = gk20a_mm_cbc_clean,
+                .set_big_page_size = gm20b_mm_set_big_page_size,
+                .get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+                .get_default_big_page_size = gp10b_mm_get_default_big_page_size,
+                .gpu_phys_addr = gm20b_gpu_phys_addr,
+                .get_iommu_bit = gk20a_mm_get_iommu_bit,
+                .get_mmu_levels = gp10b_mm_get_mmu_levels,
+                .init_pdb = gp10b_mm_init_pdb,
+                .init_mm_setup_hw = vgpu_gp10b_init_mm_setup_hw,
+                .is_bar1_supported = gm20b_mm_is_bar1_supported,
+                .init_inst_block = gk20a_init_inst_block,
+                .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
+                .init_bar2_vm = gb10b_init_bar2_vm,
+                .init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup,
+                .remove_bar2_vm = gp10b_remove_bar2_vm,
+                .get_kind_invalid = gm20b_get_kind_invalid,
+                .get_kind_pitch = gm20b_get_kind_pitch,
+        },
+        .pramin = {
+                .enter = gk20a_pramin_enter,
+                .exit = gk20a_pramin_exit,
+                .data032_r = pram_data032_r,
+        },
+        .therm = {
+                .init_therm_setup_hw = gp10b_init_therm_setup_hw,
+                .elcg_init_idle_filters = gp10b_elcg_init_idle_filters,
+        },
+        .pmu = {
+                .pmu_setup_elpg = gp10b_pmu_setup_elpg,
+                .pmu_get_queue_head = pwr_pmu_queue_head_r,
+                .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v,
+                .pmu_get_queue_tail = pwr_pmu_queue_tail_r,
+                .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v,
+                .pmu_queue_head = gk20a_pmu_queue_head,
+                .pmu_queue_tail = gk20a_pmu_queue_tail,
+                .pmu_msgq_tail = gk20a_pmu_msgq_tail,
+                .pmu_mutex_size = pwr_pmu_mutex__size_1_v,
+                .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
+                .pmu_mutex_release = gk20a_pmu_mutex_release,
+                .write_dmatrfbase = gp10b_write_dmatrfbase,
+                .pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
+                .pmu_pg_init_param = gp10b_pg_gr_init,
+                .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
+                .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
+                .dump_secure_fuses = pmu_dump_security_fuses_gp10b,
+                .reset_engine = gk20a_pmu_engine_reset,
+                .is_engine_in_reset = gk20a_pmu_is_engine_in_reset,
+        },
+        .regops = {
+                .get_global_whitelist_ranges =
+                        gp10b_get_global_whitelist_ranges,
+                .get_global_whitelist_ranges_count =
+                        gp10b_get_global_whitelist_ranges_count,
+                .get_context_whitelist_ranges =
+                        gp10b_get_context_whitelist_ranges,
+                .get_context_whitelist_ranges_count =
+                        gp10b_get_context_whitelist_ranges_count,
+                .get_runcontrol_whitelist = gp10b_get_runcontrol_whitelist,
+                .get_runcontrol_whitelist_count =
+                        gp10b_get_runcontrol_whitelist_count,
+                .get_runcontrol_whitelist_ranges =
+                        gp10b_get_runcontrol_whitelist_ranges,
+                .get_runcontrol_whitelist_ranges_count =
+                        gp10b_get_runcontrol_whitelist_ranges_count,
+                .get_qctl_whitelist = gp10b_get_qctl_whitelist,
+                .get_qctl_whitelist_count = gp10b_get_qctl_whitelist_count,
+                .get_qctl_whitelist_ranges = gp10b_get_qctl_whitelist_ranges,
+                .get_qctl_whitelist_ranges_count =
+                        gp10b_get_qctl_whitelist_ranges_count,
+                .apply_smpc_war = gp10b_apply_smpc_war,
+        },
+        .mc = {
+                .intr_enable = mc_gp10b_intr_enable,
+                .intr_unit_config = mc_gp10b_intr_unit_config,
+                .isr_stall = mc_gp10b_isr_stall,
+                .intr_stall = mc_gp10b_intr_stall,
+                .intr_stall_pause = mc_gp10b_intr_stall_pause,
+                .intr_stall_resume = mc_gp10b_intr_stall_resume,
+                .intr_nonstall = mc_gp10b_intr_nonstall,
+                .intr_nonstall_pause = mc_gp10b_intr_nonstall_pause,
+                .intr_nonstall_resume = mc_gp10b_intr_nonstall_resume,
+                .enable = gk20a_mc_enable,
+                .disable = gk20a_mc_disable,
+                .reset = gk20a_mc_reset,
+                .boot_0 = gk20a_mc_boot_0,
+                .is_intr1_pending = mc_gp10b_is_intr1_pending,
+        },
+        .debug = {
+                .show_dump = NULL,
+        },
+        .dbg_session_ops = {
+                .exec_reg_ops = vgpu_exec_regops,
+                .dbg_set_powergate = vgpu_dbg_set_powergate,
+                .check_and_set_global_reservation =
+                        vgpu_check_and_set_global_reservation,
+                .check_and_set_context_reservation =
+                        vgpu_check_and_set_context_reservation,
+                .release_profiler_reservation =
+                        vgpu_release_profiler_reservation,
+                .perfbuffer_enable = vgpu_perfbuffer_enable,
+                .perfbuffer_disable = vgpu_perfbuffer_disable,
+        },
+        .bus = {
+                .init_hw = gk20a_bus_init_hw,
+                .isr = gk20a_bus_isr,
+                .read_ptimer = vgpu_read_ptimer,
+                .get_timestamps_zipper = vgpu_get_timestamps_zipper,
+                .bar1_bind = gk20a_bus_bar1_bind,
+        },
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        .css = {
+                .enable_snapshot = vgpu_css_enable_snapshot_buffer,
+                .disable_snapshot = vgpu_css_release_snapshot_buffer,
+                .check_data_available = vgpu_css_flush_snapshots,
+                .detach_snapshot = vgpu_css_detach,
+                .set_handled_snapshots = NULL,
+                .allocate_perfmon_ids = NULL,
+                .release_perfmon_ids = NULL,
+        },
+#endif
+        .falcon = {
+                .falcon_hal_sw_init = gk20a_falcon_hal_sw_init,
+        },
+        .priv_ring = {
+                .isr = gp10b_priv_ring_isr,
+        },
+        .chip_init_gpu_characteristics = vgpu_init_gpu_characteristics,
+        .get_litter_value = gp10b_get_litter_value,
+};
+int vgpu_gp10b_init_hal(struct gk20a *g)
+{
+        struct gpu_ops *gops = &g->ops;
+        u32 val;
+        gops->ltc = vgpu_gp10b_ops.ltc;
+        gops->ce2 = vgpu_gp10b_ops.ce2;
+        gops->gr = vgpu_gp10b_ops.gr;
+        gops->fb = vgpu_gp10b_ops.fb;
+        gops->clock_gating = vgpu_gp10b_ops.clock_gating;
+        gops->fifo = vgpu_gp10b_ops.fifo;
+        gops->gr_ctx = vgpu_gp10b_ops.gr_ctx;
+        gops->fecs_trace = vgpu_gp10b_ops.fecs_trace;
+        gops->mm = vgpu_gp10b_ops.mm;
+        gops->pramin = vgpu_gp10b_ops.pramin;
+        gops->therm = vgpu_gp10b_ops.therm;
+        gops->pmu = vgpu_gp10b_ops.pmu;
+        gops->regops = vgpu_gp10b_ops.regops;
+        gops->mc = vgpu_gp10b_ops.mc;
+        gops->debug = vgpu_gp10b_ops.debug;
+        gops->dbg_session_ops = vgpu_gp10b_ops.dbg_session_ops;
+        gops->bus = vgpu_gp10b_ops.bus;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        gops->css = vgpu_gp10b_ops.css;
+#endif
+        gops->falcon = vgpu_gp10b_ops.falcon;
+        gops->priv_ring = vgpu_gp10b_ops.priv_ring;
+        /* Lone Functions */
+        gops->chip_init_gpu_characteristics =
+                vgpu_gp10b_ops.chip_init_gpu_characteristics;
+        gops->get_litter_value = vgpu_gp10b_ops.get_litter_value;
+        __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
+        __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
+#ifdef CONFIG_TEGRA_ACR
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
+        } else if (g->is_virtual) {
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true);
+        } else {
+                val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+                if (val) {
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true);
+                } else {
+                        gk20a_dbg_info("priv security is disabled in HW");
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
+                }
+        }
+#else
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                gk20a_dbg_info("running simulator with PRIV security disabled");
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
+        } else {
+                val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+                if (val) {
+                        gk20a_dbg_info("priv security is not supported but enabled");
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true);
+                        return -EPERM;
+                } else {
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
+                }
+        }
+#endif
+        /* priv security dependent ops */
+        if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                /* Add in ops from gm20b acr */
+                gops->pmu.is_pmu_supported = gm20b_is_pmu_supported,
+                gops->pmu.prepare_ucode = prepare_ucode_blob,
+                gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn,
+                gops->pmu.is_lazy_bootstrap = gm20b_is_lazy_bootstrap,
+                gops->pmu.is_priv_load = gm20b_is_priv_load,
+                gops->pmu.get_wpr = gm20b_wpr_info,
+                gops->pmu.alloc_blob_space = gm20b_alloc_blob_space,
+                gops->pmu.pmu_populate_loader_cfg =
+                        gm20b_pmu_populate_loader_cfg,
+                gops->pmu.flcn_populate_bl_dmem_desc =
+                        gm20b_flcn_populate_bl_dmem_desc,
+                gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt,
+                gops->pmu.falcon_clear_halt_interrupt_status =
+                        clear_halt_interrupt_status,
+                gops->pmu.init_falcon_setup_hw = gm20b_init_pmu_setup_hw1,
+                gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+                gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
+                gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap;
+                gops->pmu.is_priv_load = gp10b_is_priv_load;
+                gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
+        } else {
+                /* Inherit from gk20a */
+                gops->pmu.is_pmu_supported = gk20a_is_pmu_supported,
+                gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob,
+                gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1,
+                gops->pmu.pmu_nsbootstrap = pmu_bootstrap,
+                gops->pmu.load_lsfalcon_ucode = NULL;
+                gops->pmu.init_wpr_region = NULL;
+                gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
+                gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
+        }
+        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+        g->pmu_lsf_pmu_wpr_init_done = 0;
+        g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+        g->name = "gp10b";
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c
new file mode 100644
index 00000000..9eb140a3
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -0,0 +1,197 @@
+/*
+ * Virtualized GPU Memory Management
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <uapi/linux/nvgpu.h>
+#include "common/linux/vgpu/vgpu.h"
+#include "vgpu_mm_gp10b.h"
+#include "gk20a/mm_gk20a.h"
+#include <nvgpu/bug.h>
+int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g)
+{
+        g->mm.bypass_smmu = true;
+        g->mm.disable_bigpage = true;
+        return 0;
+}
+static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
+                                u64 addr, u64 size, size_t *oob_size)
+{
+        if (*oob_size < sizeof(*mem_desc))
+                return -ENOMEM;
+        mem_desc->addr = addr;
+        mem_desc->length = size;
+        *oob_size -= sizeof(*mem_desc);
+        return 0;
+}
+u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
+                                u64 map_offset,
+                                struct nvgpu_sgt *sgt,
+                                u64 buffer_offset,
+                                u64 size,
+                                int pgsz_idx,
+                                u8 kind_v,
+                                u32 ctag_offset,
+                                u32 flags,
+                                int rw_flag,
+                                bool clear_ctags,
+                                bool sparse,
+                                bool priv,
+                                struct vm_gk20a_mapping_batch *batch,
+                                enum nvgpu_aperture aperture)
+{
+        int err = 0;
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex;
+        struct tegra_vgpu_mem_desc *mem_desc;
+        u32 page_size  = vm->gmmu_page_sizes[pgsz_idx];
+        u64 buffer_size = PAGE_ALIGN(size);
+        u64 space_to_skip = buffer_offset;
+        u32 mem_desc_count = 0, i;
+        void *handle = NULL;
+        size_t oob_size;
+        u8 prot;
+        void *sgl;
+        gk20a_dbg_fn("");
+        /* FIXME: add support for sparse mappings */
+        if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
+                return 0;
+        if (space_to_skip & (page_size - 1))
+                return 0;
+        memset(&msg, 0, sizeof(msg));
+        /* Allocate (or validate when map_offset != 0) the virtual address. */
+        if (!map_offset) {
+                map_offset = __nvgpu_vm_alloc_va(vm, size, pgsz_idx);
+                if (!map_offset) {
+                        nvgpu_err(g, "failed to allocate va space");
+                        err = -ENOMEM;
+                        goto fail;
+                }
+        }
+        handle = tegra_gr_comm_oob_get_ptr(TEGRA_GR_COMM_CTX_CLIENT,
+                                        tegra_gr_comm_get_server_vmid(),
+                                        TEGRA_VGPU_QUEUE_CMD,
+                                        (void **)&mem_desc, &oob_size);
+        if (!handle) {
+                err = -EINVAL;
+                goto fail;
+        }
+        sgl = sgt->sgl;
+        while (sgl) {
+                u64 phys_addr;
+                u64 chunk_length;
+                /*
+                 * Cut out sgl ents for space_to_skip.
+                 */
+                if (space_to_skip &&
+                    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
+                        space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
+                        sgl = nvgpu_sgt_get_next(sgt, sgl);
+                        continue;
+                }
+                phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
+                chunk_length = min(size,
+                           nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
+                if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
+                                 chunk_length, &oob_size)) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+                space_to_skip = 0;
+                size -= chunk_length;
+                sgl   = nvgpu_sgt_get_next(sgt, sgl);
+                if (size == 0)
+                        break;
+        }
+        if (rw_flag == gk20a_mem_flag_read_only)
+                prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
+        else if (rw_flag == gk20a_mem_flag_write_only)
+                prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
+        else
+                prot = TEGRA_VGPU_MAP_PROT_NONE;
+        if (pgsz_idx == gmmu_page_size_kernel) {
+                if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
+                        pgsz_idx = gmmu_page_size_small;
+                } else if (page_size ==
+                                vm->gmmu_page_sizes[gmmu_page_size_big]) {
+                        pgsz_idx = gmmu_page_size_big;
+                } else {
+                        nvgpu_err(g, "invalid kernel page size %d",
+                                page_size);
+                        goto fail;
+                }
+        }
+        msg.cmd = TEGRA_VGPU_CMD_AS_MAP_EX;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = vm->handle;
+        p->gpu_va = map_offset;
+        p->size = buffer_size;
+        p->mem_desc_count = mem_desc_count;
+        p->pgsz_idx = pgsz_idx;
+        p->iova = 0;
+        p->kind = kind_v;
+        p->cacheable = (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) ? 1 : 0;
+        p->prot = prot;
+        p->ctag_offset = ctag_offset;
+        p->clear_ctags = clear_ctags;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                goto fail;
+        /* TLB invalidate handled on server side */
+        tegra_gr_comm_oob_put_ptr(handle);
+        return map_offset;
+fail:
+        if (handle)
+                tegra_gr_comm_oob_put_ptr(handle);
+        nvgpu_err(g, "Failed: err=%d, msg.ret=%d", err, msg.ret);
+        nvgpu_err(g,
+                  "  Map: %-5s GPU virt %#-12llx +%#-9llx "
+                  "phys offset: %#-4llx;  pgsz: %3dkb perm=%-2s | "
+                  "kind=%#02x APT=%-6s",
+                  vm->name, map_offset, buffer_size, buffer_offset,
+                  vm->gmmu_page_sizes[pgsz_idx] >> 10,
+                  nvgpu_gmmu_perm_str(rw_flag),
+                  kind_v, "SYSMEM");
+        for (i = 0; i < mem_desc_count; i++)
+                nvgpu_err(g, "  > 0x%010llx + 0x%llx",
+                          mem_desc[i].addr, mem_desc[i].length);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h
new file mode 100644
index 00000000..0a477dd0
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __VGPU_MM_GP10B_H__
+#define __VGPU_MM_GP10B_H__
+#include "gk20a/gk20a.h"
+u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
+                                u64 map_offset,
+                                struct nvgpu_sgt *sgt,
+                                u64 buffer_offset,
+                                u64 size,
+                                int pgsz_idx,
+                                u8 kind_v,
+                                u32 ctag_offset,
+                                u32 flags,
+                                int rw_flag,
+                                bool clear_ctags,
+                                bool sparse,
+                                bool priv,
+                                struct vm_gk20a_mapping_batch *batch,
+                                enum nvgpu_aperture aperture);
+int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
new file mode 100644
index 00000000..dd2ae306
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -0,0 +1,1214 @@
+/*
+ * Virtualized GPU Graphics
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include "vgpu.h"
+#include "gr_vgpu.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+void vgpu_gr_detect_sm_arch(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        g->params.sm_arch_sm_version =
+                        priv->constants.sm_arch_sm_version;
+        g->params.sm_arch_spa_version =
+                        priv->constants.sm_arch_spa_version;
+        g->params.sm_arch_warp_count =
+                        priv->constants.sm_arch_warp_count;
+}
+int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX;
+        msg.handle = vgpu_get_handle(c->g);
+        p->handle = c->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -1 : 0;
+}
+static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g,
+                                        struct channel_gk20a *c, bool patch)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = c->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -1 : 0;
+}
+/* load saved fresh copy of gloden image into channel gr_ctx */
+static int vgpu_gr_load_golden_ctx_image(struct gk20a *g,
+                                        struct channel_gk20a *c)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = c->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -1 : 0;
+}
+int vgpu_gr_init_ctx_state(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size;
+        g->gr.ctx_vars.zcull_ctxsw_image_size = priv->constants.zcull_ctx_size;
+        g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size;
+        if (!g->gr.ctx_vars.golden_image_size ||
+                !g->gr.ctx_vars.zcull_ctxsw_image_size ||
+                !g->gr.ctx_vars.pm_ctxsw_image_size)
+                return -ENXIO;
+        gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
+        g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
+        return 0;
+}
+static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        int attr_buffer_size;
+        u32 cb_buffer_size = gr->bundle_cb_default_size *
+                gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
+        u32 pagepool_buffer_size = g->ops.gr.pagepool_default_size(g) *
+                gr_scc_pagepool_total_pages_byte_granularity_v();
+        gk20a_dbg_fn("");
+        attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
+        gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
+        gr->global_ctx_buffer[CIRCULAR].mem.size = cb_buffer_size;
+        gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
+        gr->global_ctx_buffer[PAGEPOOL].mem.size = pagepool_buffer_size;
+        gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
+        gr->global_ctx_buffer[ATTRIBUTE].mem.size = attr_buffer_size;
+        gk20a_dbg_info("priv access map size : %d",
+                gr->ctx_vars.priv_access_map_size);
+        gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size =
+                gr->ctx_vars.priv_access_map_size;
+        return 0;
+}
+static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
+                                        struct channel_gk20a *c)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+        struct vm_gk20a *ch_vm = c->vm;
+        u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+        u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+        struct gr_gk20a *gr = &g->gr;
+        u64 gpu_va;
+        u32 i;
+        int err;
+        gk20a_dbg_fn("");
+        /* FIXME: add VPR support */
+        /* Circular Buffer */
+        gpu_va = __nvgpu_vm_alloc_va(ch_vm,
+                        gr->global_ctx_buffer[CIRCULAR].mem.size,
+                        gmmu_page_size_kernel);
+        if (!gpu_va)
+                goto clean_up;
+        g_bfr_va[CIRCULAR_VA] = gpu_va;
+        g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].mem.size;
+        /* Attribute Buffer */
+        gpu_va = __nvgpu_vm_alloc_va(ch_vm,
+                        gr->global_ctx_buffer[ATTRIBUTE].mem.size,
+                        gmmu_page_size_kernel);
+        if (!gpu_va)
+                goto clean_up;
+        g_bfr_va[ATTRIBUTE_VA] = gpu_va;
+        g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].mem.size;
+        /* Page Pool */
+        gpu_va = __nvgpu_vm_alloc_va(ch_vm,
+                        gr->global_ctx_buffer[PAGEPOOL].mem.size,
+                        gmmu_page_size_kernel);
+        if (!gpu_va)
+                goto clean_up;
+        g_bfr_va[PAGEPOOL_VA] = gpu_va;
+        g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].mem.size;
+        /* Priv register Access Map */
+        gpu_va = __nvgpu_vm_alloc_va(ch_vm,
+                        gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size,
+                        gmmu_page_size_kernel);
+        if (!gpu_va)
+                goto clean_up;
+        g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
+        g_bfr_size[PRIV_ACCESS_MAP_VA] =
+                gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size;
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = c->virt_ctx;
+        p->cb_va = g_bfr_va[CIRCULAR_VA];
+        p->attr_va = g_bfr_va[ATTRIBUTE_VA];
+        p->page_pool_va = g_bfr_va[PAGEPOOL_VA];
+        p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA];
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                goto clean_up;
+        c->ch_ctx.global_ctx_buffer_mapped = true;
+        return 0;
+ clean_up:
+        for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
+                if (g_bfr_va[i]) {
+                        __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
+                                           gmmu_page_size_kernel);
+                        g_bfr_va[i] = 0;
+                }
+        }
+        return -ENOMEM;
+}
+static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)
+{
+        struct vm_gk20a *ch_vm = c->vm;
+        u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+        u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+        u32 i;
+        gk20a_dbg_fn("");
+        if (c->ch_ctx.global_ctx_buffer_mapped) {
+                struct tegra_vgpu_cmd_msg msg;
+                struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+                int err;
+                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX;
+                msg.handle = vgpu_get_handle(c->g);
+                p->handle = c->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                WARN_ON(err || msg.ret);
+        }
+        for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
+                if (g_bfr_va[i]) {
+                        __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
+                                           gmmu_page_size_kernel);
+                        g_bfr_va[i] = 0;
+                        g_bfr_size[i] = 0;
+                }
+        }
+        c->ch_ctx.global_ctx_buffer_mapped = false;
+}
+int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
+                        struct gr_ctx_desc **__gr_ctx,
+                        struct vm_gk20a *vm,
+                        u32 class,
+                        u32 flags)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+        struct gr_gk20a *gr = &g->gr;
+        struct gr_ctx_desc *gr_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        if (gr->ctx_vars.buffer_size == 0)
+                return 0;
+        /* alloc channel gr ctx buffer */
+        gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
+        gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
+        gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
+        if (!gr_ctx)
+                return -ENOMEM;
+        gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
+        gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm,
+                                                gr_ctx->mem.size,
+                                                gmmu_page_size_kernel);
+        if (!gr_ctx->mem.gpu_va) {
+                nvgpu_kfree(g, gr_ctx);
+                return -ENOMEM;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC;
+        msg.handle = vgpu_get_handle(g);
+        p->as_handle = vm->handle;
+        p->gr_ctx_va = gr_ctx->mem.gpu_va;
+        p->class_num = class;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (unlikely(err)) {
+                nvgpu_err(g, "fail to alloc gr_ctx");
+                __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
+                                   gmmu_page_size_kernel);
+                nvgpu_kfree(g, gr_ctx);
+        } else {
+                gr_ctx->virt_ctx = p->gr_ctx_handle;
+                *__gr_ctx = gr_ctx;
+        }
+        return err;
+}
+void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+                        struct gr_ctx_desc *gr_ctx)
+{
+        gk20a_dbg_fn("");
+        if (gr_ctx && gr_ctx->mem.gpu_va) {
+                struct tegra_vgpu_cmd_msg msg;
+                struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+                int err;
+                msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
+                msg.handle = vgpu_get_handle(g);
+                p->gr_ctx_handle = gr_ctx->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                WARN_ON(err || msg.ret);
+                __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
+                                   gmmu_page_size_kernel);
+                nvgpu_kfree(g, gr_ctx);
+        }
+}
+static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
+{
+        gk20a_dbg_fn("");
+        c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
+        c->ch_ctx.gr_ctx = NULL;
+}
+static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
+                                        struct channel_gk20a *c)
+{
+        struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+        struct vm_gk20a *ch_vm = c->vm;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        patch_ctx->mem.size = 128 * sizeof(u32);
+        patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm,
+                                                patch_ctx->mem.size,
+                                                gmmu_page_size_kernel);
+        if (!patch_ctx->mem.gpu_va)
+                return -ENOMEM;
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = c->virt_ctx;
+        p->patch_ctx_va = patch_ctx->mem.gpu_va;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
+                                   gmmu_page_size_kernel);
+                err = -ENOMEM;
+        }
+        return err;
+}
+static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
+{
+        struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+        struct vm_gk20a *ch_vm = c->vm;
+        gk20a_dbg_fn("");
+        if (patch_ctx->mem.gpu_va) {
+                struct tegra_vgpu_cmd_msg msg;
+                struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
+                int err;
+                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX;
+                msg.handle = vgpu_get_handle(c->g);
+                p->handle = c->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                WARN_ON(err || msg.ret);
+                __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
+                                   gmmu_page_size_kernel);
+                patch_ctx->mem.gpu_va = 0;
+        }
+}
+static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
+        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
+        int err;
+        gk20a_dbg_fn("");
+        /* check if hwpm was ever initialized. If not, nothing to do */
+        if (pm_ctx->mem.gpu_va == 0)
+                return;
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX;
+        msg.handle = vgpu_get_handle(c->g);
+        p->handle = c->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va,
+                           gmmu_page_size_kernel);
+        pm_ctx->mem.gpu_va = 0;
+}
+void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg)
+{
+        gk20a_dbg_fn("");
+        if (c->g->ops.fifo.free_channel_ctx_header)
+                c->g->ops.fifo.free_channel_ctx_header(c);
+        vgpu_gr_unmap_global_ctx_buffers(c);
+        vgpu_gr_free_channel_patch_ctx(c);
+        vgpu_gr_free_channel_pm_ctx(c);
+        if (!is_tsg)
+                vgpu_gr_free_channel_gr_ctx(c);
+        /* zcull_ctx, pm_ctx */
+        memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
+        c->first_init = false;
+}
+static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
+{
+        struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx;
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_channel_bind_gr_ctx_params *p =
+                                &msg.params.ch_bind_gr_ctx;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX;
+        msg.handle = vgpu_get_handle(c->g);
+        p->ch_handle = c->virt_ctx;
+        p->gr_ctx_handle = gr_ctx->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        return err;
+}
+static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg)
+{
+        struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx;
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_tsg_bind_gr_ctx_params *p =
+                                        &msg.params.tsg_bind_gr_ctx;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_TSG_BIND_GR_CTX;
+        msg.handle = vgpu_get_handle(tsg->g);
+        p->tsg_id = tsg->tsgid;
+        p->gr_ctx_handle = gr_ctx->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        return err;
+}
+int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
+{
+        struct gk20a *g = c->g;
+        struct fifo_gk20a *f = &g->fifo;
+        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        struct tsg_gk20a *tsg = NULL;
+        int err = 0;
+        gk20a_dbg_fn("");
+        /* an address space needs to have been bound at this point.*/
+        if (!gk20a_channel_as_bound(c)) {
+                nvgpu_err(g, "not bound to address space at time"
+                           " of grctx allocation");
+                return -EINVAL;
+        }
+        if (!g->ops.gr.is_valid_class(g, class_num)) {
+                nvgpu_err(g, "invalid obj class 0x%x", class_num);
+                err = -EINVAL;
+                goto out;
+        }
+        c->obj_class = class_num;
+        if (gk20a_is_channel_marked_as_tsg(c))
+                tsg = &f->tsg[c->tsgid];
+        if (!tsg) {
+                /* allocate gr ctx buffer */
+                if (!ch_ctx->gr_ctx) {
+                        err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx,
+                                                c->vm,
+                                                class_num,
+                                                flags);
+                        if (!err)
+                                err = vgpu_gr_ch_bind_gr_ctx(c);
+                        if (err) {
+                                nvgpu_err(g, "fail to allocate gr ctx buffer");
+                                goto out;
+                        }
+                } else {
+                        /*TBD: needs to be more subtle about which is
+                         * being allocated as some are allowed to be
+                         * allocated along same channel */
+                        nvgpu_err(g,
+                                "too many classes alloc'd on same channel");
+                        err = -EINVAL;
+                        goto out;
+                }
+        } else {
+                if (!tsg->tsg_gr_ctx) {
+                        tsg->vm = c->vm;
+                        nvgpu_vm_get(tsg->vm);
+                        err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
+                                                c->vm,
+                                                class_num,
+                                                flags);
+                        if (!err)
+                                err = vgpu_gr_tsg_bind_gr_ctx(tsg);
+                        if (err) {
+                                nvgpu_err(g,
+                                        "fail to allocate TSG gr ctx buffer, err=%d", err);
+                                nvgpu_vm_put(tsg->vm);
+                                tsg->vm = NULL;
+                                goto out;
+                        }
+                }
+                ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
+                err = vgpu_gr_ch_bind_gr_ctx(c);
+                if (err) {
+                        nvgpu_err(g, "fail to bind gr ctx buffer");
+                        goto out;
+                }
+        }
+        /* commit gr ctx buffer */
+        err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va);
+        if (err) {
+                nvgpu_err(g, "fail to commit gr ctx buffer");
+                goto out;
+        }
+        /* allocate patch buffer */
+        if (ch_ctx->patch_ctx.mem.priv.pages == NULL) {
+                err = vgpu_gr_alloc_channel_patch_ctx(g, c);
+                if (err) {
+                        nvgpu_err(g, "fail to allocate patch buffer");
+                        goto out;
+                }
+        }
+        /* map global buffer to channel gpu_va and commit */
+        if (!ch_ctx->global_ctx_buffer_mapped) {
+                err = vgpu_gr_map_global_ctx_buffers(g, c);
+                if (err) {
+                        nvgpu_err(g, "fail to map global ctx buffer");
+                        goto out;
+                }
+                gr_gk20a_elpg_protected_call(g,
+                                vgpu_gr_commit_global_ctx_buffers(g, c, true));
+        }
+        /* load golden image */
+        if (!c->first_init) {
+                err = gr_gk20a_elpg_protected_call(g,
+                                vgpu_gr_load_golden_ctx_image(g, c));
+                if (err) {
+                        nvgpu_err(g, "fail to load golden ctx image");
+                        goto out;
+                }
+                c->first_init = true;
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+out:
+        /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
+           can be reused so no need to release them.
+           2. golden image load is a one time thing so if
+           they pass, no need to undo. */
+        nvgpu_err(g, "fail");
+        return err;
+}
+static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        u32 gpc_index;
+        int err = -ENOMEM;
+        gk20a_dbg_fn("");
+        gr->max_gpc_count = priv->constants.max_gpc_count;
+        gr->gpc_count = priv->constants.gpc_count;
+        gr->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
+        gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
+        gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
+        if (!gr->gpc_tpc_count)
+                goto cleanup;
+        gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
+        if (!gr->gpc_tpc_mask)
+                goto cleanup;
+        gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count *
+                                          gr->max_tpc_per_gpc_count *
+                                          sizeof(struct sm_info));
+        if (!gr->sm_to_cluster)
+                goto cleanup;
+        gr->tpc_count = 0;
+        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                gr->gpc_tpc_count[gpc_index] =
+                        priv->constants.gpc_tpc_count[gpc_index];
+                gr->tpc_count += gr->gpc_tpc_count[gpc_index];
+                if (g->ops.gr.get_gpc_tpc_mask)
+                        gr->gpc_tpc_mask[gpc_index] =
+                                g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
+        }
+        g->ops.gr.bundle_cb_defaults(g);
+        g->ops.gr.cb_size_default(g);
+        g->ops.gr.calc_global_ctx_buffer_size(g);
+        err = g->ops.gr.init_fs_state(g);
+        if (err)
+                goto cleanup;
+        return 0;
+cleanup:
+        nvgpu_err(g, "out of memory");
+        nvgpu_kfree(g, gr->gpc_tpc_count);
+        gr->gpc_tpc_count = NULL;
+        nvgpu_kfree(g, gr->gpc_tpc_mask);
+        gr->gpc_tpc_mask = NULL;
+        return err;
+}
+int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
+                                struct channel_gk20a *c, u64 zcull_va,
+                                u32 mode)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = c->virt_ctx;
+        p->zcull_va = zcull_va;
+        p->mode = mode;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -ENOMEM : 0;
+}
+int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
+                                struct gr_zcull_info *zcull_params)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO;
+        msg.handle = vgpu_get_handle(g);
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                return -ENOMEM;
+        zcull_params->width_align_pixels = p->width_align_pixels;
+        zcull_params->height_align_pixels = p->height_align_pixels;
+        zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots;
+        zcull_params->aliquot_total = p->aliquot_total;
+        zcull_params->region_byte_multiplier = p->region_byte_multiplier;
+        zcull_params->region_header_size = p->region_header_size;
+        zcull_params->subregion_header_size = p->subregion_header_size;
+        zcull_params->subregion_width_align_pixels =
+                p->subregion_width_align_pixels;
+        zcull_params->subregion_height_align_pixels =
+                p->subregion_height_align_pixels;
+        zcull_params->subregion_count = p->subregion_count;
+        return 0;
+}
+u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        return priv->constants.gpc_tpc_mask[gpc_index];
+}
+u32 vgpu_gr_get_max_fbps_count(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        return priv->constants.num_fbps;
+}
+u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        return priv->constants.fbp_en_mask;
+}
+u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        return priv->constants.ltc_per_fbp;
+}
+u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        return priv->constants.max_lts_per_ltc;
+}
+u32 *vgpu_gr_rop_l2_en_mask(struct gk20a *g)
+{
+        /* no one use it yet */
+        return NULL;
+}
+int vgpu_gr_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
+                           struct zbc_entry *zbc_val)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_zbc_set_table_params *p = &msg.params.zbc_set_table;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_ZBC_SET_TABLE;
+        msg.handle = vgpu_get_handle(g);
+        p->type = zbc_val->type;
+        p->format = zbc_val->format;
+        switch (p->type) {
+        case GK20A_ZBC_TYPE_COLOR:
+                memcpy(p->color_ds, zbc_val->color_ds, sizeof(p->color_ds));
+                memcpy(p->color_l2, zbc_val->color_l2, sizeof(p->color_l2));
+                break;
+        case GK20A_ZBC_TYPE_DEPTH:
+                p->depth = zbc_val->depth;
+                break;
+        default:
+                return -EINVAL;
+        }
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -ENOMEM : 0;
+}
+int vgpu_gr_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
+                        struct zbc_query_params *query_params)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_zbc_query_table_params *p =
+                                        &msg.params.zbc_query_table;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_ZBC_QUERY_TABLE;
+        msg.handle = vgpu_get_handle(g);
+        p->type = query_params->type;
+        p->index_size = query_params->index_size;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                return -ENOMEM;
+        switch (query_params->type) {
+        case GK20A_ZBC_TYPE_COLOR:
+                memcpy(query_params->color_ds, p->color_ds,
+                                sizeof(query_params->color_ds));
+                memcpy(query_params->color_l2, p->color_l2,
+                                sizeof(query_params->color_l2));
+                break;
+        case GK20A_ZBC_TYPE_DEPTH:
+                query_params->depth = p->depth;
+                break;
+        case GK20A_ZBC_TYPE_INVALID:
+                query_params->index_size = p->index_size;
+                break;
+        default:
+                return -EINVAL;
+        }
+        query_params->ref_cnt = p->ref_cnt;
+        query_params->format = p->format;
+        return 0;
+}
+static void vgpu_remove_gr_support(struct gr_gk20a *gr)
+{
+        gk20a_dbg_fn("");
+        gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags);
+        nvgpu_kfree(gr->g, gr->sm_error_states);
+        gr->sm_error_states = NULL;
+        nvgpu_kfree(gr->g, gr->gpc_tpc_mask);
+        gr->gpc_tpc_mask = NULL;
+        nvgpu_kfree(gr->g, gr->sm_to_cluster);
+        gr->sm_to_cluster = NULL;
+        nvgpu_kfree(gr->g, gr->gpc_tpc_count);
+        gr->gpc_tpc_count = NULL;
+}
+static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        int err;
+        gk20a_dbg_fn("");
+        if (gr->sw_ready) {
+                gk20a_dbg_fn("skip init");
+                return 0;
+        }
+        gr->g = g;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        nvgpu_mutex_init(&g->gr.cs_lock);
+#endif
+        err = vgpu_gr_init_gr_config(g, gr);
+        if (err)
+                goto clean_up;
+        err = g->ops.gr.init_ctx_state(g);
+        if (err)
+                goto clean_up;
+        err = g->ops.ltc.init_comptags(g, gr);
+        if (err)
+                goto clean_up;
+        err = vgpu_gr_alloc_global_ctx_buffers(g);
+        if (err)
+                goto clean_up;
+        nvgpu_mutex_init(&gr->ctx_mutex);
+        gr->sm_error_states = nvgpu_kzalloc(g,
+                        sizeof(struct nvgpu_gr_sm_error_state) *
+                        gr->no_of_sm);
+        if (!gr->sm_error_states) {
+                err = -ENOMEM;
+                goto clean_up;
+        }
+        gr->remove_support = vgpu_remove_gr_support;
+        gr->sw_ready = true;
+        gk20a_dbg_fn("done");
+        return 0;
+clean_up:
+        nvgpu_err(g, "fail");
+        vgpu_remove_gr_support(gr);
+        return err;
+}
+int vgpu_init_gr_support(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        return vgpu_gr_init_gr_setup_sw(g);
+}
+int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        struct channel_gk20a *ch = gk20a_channel_get(&f->channel[info->chid]);
+        gk20a_dbg_fn("");
+        if (!ch)
+                return 0;
+        if (info->type != TEGRA_VGPU_GR_INTR_NOTIFY &&
+                info->type != TEGRA_VGPU_GR_INTR_SEMAPHORE)
+                nvgpu_err(g, "gr intr (%d) on ch %u", info->type, info->chid);
+        switch (info->type) {
+        case TEGRA_VGPU_GR_INTR_NOTIFY:
+                nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
+                break;
+        case TEGRA_VGPU_GR_INTR_SEMAPHORE:
+                nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
+                break;
+        case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
+                gk20a_set_error_notifier(ch,
+                                NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
+                break;
+        case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
+                gk20a_set_error_notifier(ch,
+                                        NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
+        case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
+                break;
+        case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
+                gk20a_set_error_notifier(ch,
+                                        NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+                break;
+        case TEGRA_VGPU_GR_INTR_FECS_ERROR:
+                break;
+        case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
+                gk20a_set_error_notifier(ch,
+                                        NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+                break;
+        case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
+                gk20a_set_error_notifier(ch,
+                                NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+                break;
+        case TEGRA_VGPU_GR_INTR_EXCEPTION:
+                gk20a_set_error_notifier(ch,
+                                NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+                break;
+        case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
+                gk20a_dbg_gpu_post_events(ch);
+                break;
+        default:
+                WARN_ON(1);
+                break;
+        }
+        gk20a_channel_put(ch);
+        return 0;
+}
+int vgpu_gr_nonstall_isr(struct gk20a *g,
+                        struct tegra_vgpu_gr_nonstall_intr_info *info)
+{
+        gk20a_dbg_fn("");
+        switch (info->type) {
+        case TEGRA_VGPU_GR_NONSTALL_INTR_SEMAPHORE:
+                gk20a_channel_semaphore_wakeup(g, true);
+                break;
+        default:
+                WARN_ON(1);
+                break;
+        }
+        return 0;
+}
+int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
+        struct channel_gk20a *ch, u64 sms, bool enable)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_sm_debug_mode *p = &msg.params.sm_debug_mode;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_SET_SM_DEBUG_MODE;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        p->sms = sms;
+        p->enable = (u32)enable;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        return err ? err : msg.ret;
+}
+int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
+        struct channel_gk20a *ch, bool enable)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        if (enable)
+                p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+        else
+                p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        return err ? err : msg.ret;
+}
+int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
+        struct channel_gk20a *ch, bool enable)
+{
+        struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+        struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
+        int err;
+        gk20a_dbg_fn("");
+        if (enable) {
+                p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+                /* Allocate buffer if necessary */
+                if (pm_ctx->mem.gpu_va == 0) {
+                        pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm,
+                                        g->gr.ctx_vars.pm_ctxsw_image_size,
+                                        gmmu_page_size_kernel);
+                        if (!pm_ctx->mem.gpu_va)
+                                return -ENOMEM;
+                        pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
+                }
+        } else
+                p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        p->gpu_va = pm_ctx->mem.gpu_va;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        return err ? err : msg.ret;
+}
+int vgpu_gr_clear_sm_error_state(struct gk20a *g,
+                struct channel_gk20a *ch, u32 sm_id)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_clear_sm_error_state *p =
+                        &msg.params.clear_sm_error_state;
+        int err;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        msg.cmd = TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = ch->virt_ctx;
+        p->sm_id = sm_id;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+        return err ? err : msg.ret;
+        return 0;
+}
+static int vgpu_gr_suspend_resume_contexts(struct gk20a *g,
+                struct dbg_session_gk20a *dbg_s,
+                int *ctx_resident_ch_fd, u32 cmd)
+{
+        struct dbg_session_channel_data *ch_data;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_suspend_resume_contexts *p;
+        size_t n;
+        int channel_fd = -1;
+        int err = 0;
+        void *handle = NULL;
+        u16 *oob;
+        size_t oob_size;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+        handle = tegra_gr_comm_oob_get_ptr(TEGRA_GR_COMM_CTX_CLIENT,
+                        tegra_gr_comm_get_server_vmid(), TEGRA_VGPU_QUEUE_CMD,
+                        (void **)&oob, &oob_size);
+        if (!handle) {
+                err = -EINVAL;
+                goto done;
+        }
+        n = 0;
+        list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry)
+                n++;
+        if (oob_size < n * sizeof(u16)) {
+                err = -ENOMEM;
+                goto done;
+        }
+        msg.cmd = cmd;
+        msg.handle = vgpu_get_handle(g);
+        p = &msg.params.suspend_contexts;
+        p->num_channels = n;
+        n = 0;
+        list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry)
+                oob[n++] = (u16)ch_data->chid;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                err = -ENOMEM;
+                goto done;
+        }
+        if (p->resident_chid != (u16)~0) {
+                list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
+                        if (ch_data->chid == p->resident_chid) {
+                                channel_fd = ch_data->channel_fd;
+                                break;
+                        }
+                }
+        }
+done:
+        if (handle)
+                tegra_gr_comm_oob_put_ptr(handle);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+        *ctx_resident_ch_fd = channel_fd;
+        return err;
+}
+int vgpu_gr_suspend_contexts(struct gk20a *g,
+                struct dbg_session_gk20a *dbg_s,
+                int *ctx_resident_ch_fd)
+{
+        return vgpu_gr_suspend_resume_contexts(g, dbg_s,
+                        ctx_resident_ch_fd, TEGRA_VGPU_CMD_SUSPEND_CONTEXTS);
+}
+int vgpu_gr_resume_contexts(struct gk20a *g,
+                struct dbg_session_gk20a *dbg_s,
+                int *ctx_resident_ch_fd)
+{
+        return vgpu_gr_suspend_resume_contexts(g, dbg_s,
+                        ctx_resident_ch_fd, TEGRA_VGPU_CMD_RESUME_CONTEXTS);
+}
+void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
+                        struct tegra_vgpu_sm_esr_info *info)
+{
+        struct nvgpu_gr_sm_error_state *sm_error_states;
+        if (info->sm_id >= g->gr.no_of_sm) {
+                nvgpu_err(g, "invalid smd_id %d / %d",
+                        info->sm_id, g->gr.no_of_sm);
+                return;
+        }
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        sm_error_states = &g->gr.sm_error_states[info->sm_id];
+        sm_error_states->hww_global_esr = info->hww_global_esr;
+        sm_error_states->hww_warp_esr = info->hww_warp_esr;
+        sm_error_states->hww_warp_esr_pc = info->hww_warp_esr_pc;
+        sm_error_states->hww_global_esr_report_mask =
+                                info->hww_global_esr_report_mask;
+        sm_error_states->hww_warp_esr_report_mask =
+                                info->hww_warp_esr_report_mask;
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
new file mode 100644
index 00000000..7815201e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _GR_VGPU_H_
+#define _GR_VGPU_H_
+#include <nvgpu/types.h>
+struct gk20a;
+struct channel_gk20a;
+struct gr_gk20a;
+struct gr_zcull_info;
+struct zbc_entry;
+struct zbc_query_params;
+struct dbg_session_gk20a;
+void vgpu_gr_detect_sm_arch(struct gk20a *g);
+void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
+int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags);
+int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
+                                struct channel_gk20a *c, u64 zcull_va,
+                                u32 mode);
+int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
+                                struct gr_zcull_info *zcull_params);
+u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
+u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);
+u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g);
+u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g);
+u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g);
+u32 *vgpu_gr_rop_l2_en_mask(struct gk20a *g);
+int vgpu_gr_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
+                           struct zbc_entry *zbc_val);
+int vgpu_gr_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
+                        struct zbc_query_params *query_params);
+int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
+        struct channel_gk20a *ch, u64 sms, bool enable);
+int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
+        struct channel_gk20a *ch, bool enable);
+int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
+        struct channel_gk20a *ch, bool enable);
+int vgpu_gr_clear_sm_error_state(struct gk20a *g,
+                struct channel_gk20a *ch, u32 sm_id);
+int vgpu_gr_suspend_contexts(struct gk20a *g,
+                struct dbg_session_gk20a *dbg_s,
+                int *ctx_resident_ch_fd);
+int vgpu_gr_resume_contexts(struct gk20a *g,
+                struct dbg_session_gk20a *dbg_s,
+                int *ctx_resident_ch_fd);
+int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
new file mode 100644
index 00000000..3b9d63e8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "gk20a/gk20a.h"
+#include "common/linux/vgpu/clk_vgpu.h"
+#include "common/linux/platform_gk20a.h"
+#include "common/linux/os_linux.h"
+#include <nvgpu/nvhost.h>
+#include <nvgpu/nvhost_t19x.h>
+#include <linux/platform_device.h>
+static int gv11b_vgpu_probe(struct device *dev)
+{
+        struct platform_device *pdev = to_platform_device(dev);
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        struct resource *r;
+        void __iomem *regs;
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g);
+        struct gk20a *g = platform->g;
+        int ret;
+        r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode");
+        if (!r) {
+                dev_err(dev, "failed to get usermode regs\n");
+                return -ENXIO;
+        }
+        regs = devm_ioremap_resource(dev, r);
+        if (IS_ERR(regs)) {
+                dev_err(dev, "failed to map usermode regs\n");
+                return PTR_ERR(regs);
+        }
+        l->t19x.usermode_regs = regs;
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+        ret = nvgpu_get_nvhost_dev(g);
+        if (ret) {
+                l->t19x.usermode_regs = NULL;
+                return ret;
+        }
+        ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev,
+                                                        &g->syncpt_unit_base,
+                                                        &g->syncpt_unit_size);
+        if (ret) {
+                dev_err(dev, "Failed to get syncpt interface");
+                return -ENOSYS;
+        }
+        g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
+        nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
+                g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size);
+#endif
+        vgpu_init_clk_support(platform->g);
+        return 0;
+}
+struct gk20a_platform gv11b_vgpu_tegra_platform = {
+        .has_syncpoints = true,
+        .aggressive_sync_destroy_thresh = 64,
+        /* power management configuration */
+        .can_railgate_init      = false,
+        .can_elpg_init          = false,
+        .enable_slcg            = false,
+        .enable_blcg            = false,
+        .enable_elcg            = false,
+        .enable_elpg            = false,
+        .enable_aelpg           = false,
+        .can_slcg               = false,
+        .can_blcg               = false,
+        .can_elcg               = false,
+        .ch_wdt_timeout_ms = 5000,
+        .probe = gv11b_vgpu_probe,
+        .clk_round_rate = vgpu_clk_round_rate,
+        .get_clk_freqs = vgpu_clk_get_freqs,
+        /* frequency scaling configuration */
+        .devfreq_governor = "userspace",
+        .virtual_dev = true,
+};
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_fifo_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_fifo_gv11b.c
new file mode 100644
index 00000000..710e4b90
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_fifo_gv11b.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <gk20a/gk20a.h>
+#include "common/linux/vgpu/vgpu.h"
+#include "gv11b/fifo_gv11b.h"
+#include <nvgpu/nvhost_t19x.h>
+#include <linux/tegra_vgpu.h>
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+int vgpu_gv11b_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
+                                u32 syncpt_id, struct nvgpu_mem *syncpt_buf)
+{
+        int err;
+        struct gk20a *g = c->g;
+        struct vm_gk20a *vm = c->vm;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_map_syncpt_params *p = &msg.params.t19x.map_syncpt;
+        /*
+         * Add ro map for complete sync point shim range in vm.
+         * All channels sharing same vm will share same ro mapping.
+         * Create rw map for current channel sync point.
+         */
+        if (!vm->syncpt_ro_map_gpu_va) {
+                vm->syncpt_ro_map_gpu_va = __nvgpu_vm_alloc_va(vm,
+                                g->syncpt_unit_size,
+                                gmmu_page_size_kernel);
+                if (!vm->syncpt_ro_map_gpu_va) {
+                        nvgpu_err(g, "allocating read-only va space failed");
+                        return -ENOMEM;
+                }
+                msg.cmd = TEGRA_VGPU_CMD_MAP_SYNCPT;
+                msg.handle = vgpu_get_handle(g);
+                p->as_handle = c->vm->handle;
+                p->gpu_va = vm->syncpt_ro_map_gpu_va;
+                p->len = g->syncpt_unit_size;
+                p->offset = 0;
+                p->prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                err = err ? err : msg.ret;
+                if (err) {
+                        nvgpu_err(g,
+                                "mapping read-only va space failed err %d",
+                                err);
+                        __nvgpu_vm_free_va(c->vm, vm->syncpt_ro_map_gpu_va,
+                                        gmmu_page_size_kernel);
+                        vm->syncpt_ro_map_gpu_va = 0;
+                        return err;
+                }
+        }
+        syncpt_buf->gpu_va = __nvgpu_vm_alloc_va(c->vm, g->syncpt_size,
+                        gmmu_page_size_kernel);
+        if (!syncpt_buf->gpu_va) {
+                nvgpu_err(g, "allocating syncpt va space failed");
+                return -ENOMEM;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_MAP_SYNCPT;
+        msg.handle = vgpu_get_handle(g);
+        p->as_handle = c->vm->handle;
+        p->gpu_va = syncpt_buf->gpu_va;
+        p->len = g->syncpt_size;
+        p->offset =
+                nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
+        p->prot = TEGRA_VGPU_MAP_PROT_NONE;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(g, "mapping syncpt va space failed err %d", err);
+                __nvgpu_vm_free_va(c->vm, syncpt_buf->gpu_va,
+                                gmmu_page_size_kernel);
+                return err;
+        }
+        return 0;
+}
+#endif /* CONFIG_TEGRA_GK20A_NVHOST */
+int vgpu_gv11b_init_fifo_setup_hw(struct gk20a *g)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        int err;
+        err = vgpu_get_attribute(vgpu_get_handle(g),
+                        TEGRA_VGPU_ATTRIB_MAX_SUBCTX_COUNT,
+                        &f->t19x.max_subctx_count);
+        if (err) {
+                nvgpu_err(g, "get max_subctx_count failed %d", err);
+                return err;
+        }
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_fifo_gv11b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_fifo_gv11b.h
new file mode 100644
index 00000000..c2e75680
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_fifo_gv11b.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VGPU_FIFO_GV11B_H_
+#define _VGPU_FIFO_GV11B_H_
+struct gk20a;
+int vgpu_gv11b_init_fifo_setup_hw(struct gk20a *g);
+int vgpu_gv11b_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
+                        u32 syncpt_id, struct nvgpu_mem *syncpt_buf);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gr_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gr_gv11b.c
new file mode 100644
index 00000000..69e5b2ce
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gr_gv11b.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "gk20a/gk20a.h"
+#include "common/linux/vgpu/gr_vgpu.h"
+#include "vgpu_subctx_gv11b.h"
+int vgpu_gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+        int err;
+        err = vgpu_gv11b_alloc_subctx_header(c);
+        if (err)
+                return err;
+        err = vgpu_gr_commit_inst(c, gpu_va);
+        if (err)
+                vgpu_gv11b_free_subctx_header(c);
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gr_gv11b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gr_gv11b.h
new file mode 100644
index 00000000..0208012d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gr_gv11b.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VGPU_GR_GV11B_H_
+#define _VGPU_GR_GV11B_H_
+struct channel_gk20a;
+int vgpu_gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gv11b.c
new file mode 100644
index 00000000..9ba1892b
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gv11b.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "gk20a/gk20a.h"
+#include <nvgpu/enabled.h>
+#include <nvgpu/enabled_t19x.h>
+#include "common/linux/vgpu/vgpu.h"
+#include "vgpu_gv11b.h"
+int vgpu_gv11b_init_gpu_characteristics(struct gk20a *g)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = vgpu_init_gpu_characteristics(g);
+        if (err) {
+                nvgpu_err(g, "vgpu_init_gpu_characteristics failed, err %d\n", err);
+                return err;
+        }
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gv11b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gv11b.h
new file mode 100644
index 00000000..84ebfa17
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_gv11b.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VGPU_GV11B_H_
+#define _VGPU_GV11B_H_
+struct gk20a;
+int vgpu_gv11b_init_gpu_characteristics(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
new file mode 100644
index 00000000..6b5a1b0d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -0,0 +1,637 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <gk20a/gk20a.h>
+#include <gv11b/hal_gv11b.h>
+#include "common/linux/vgpu/vgpu.h"
+#include "common/linux/vgpu/fifo_vgpu.h"
+#include "common/linux/vgpu/gr_vgpu.h"
+#include "common/linux/vgpu/ltc_vgpu.h"
+#include "common/linux/vgpu/mm_vgpu.h"
+#include "common/linux/vgpu/dbg_vgpu.h"
+#include "common/linux/vgpu/fecs_trace_vgpu.h"
+#include "common/linux/vgpu/css_vgpu.h"
+#include "common/linux/vgpu/vgpu_t19x.h"
+#include "common/linux/vgpu/gm20b/vgpu_gr_gm20b.h"
+#include "common/linux/vgpu/gp10b/vgpu_mm_gp10b.h"
+#include "common/linux/vgpu/gp10b/vgpu_gr_gp10b.h"
+#include <gk20a/fb_gk20a.h>
+#include <gk20a/flcn_gk20a.h>
+#include <gk20a/bus_gk20a.h>
+#include <gk20a/mc_gk20a.h>
+#include <gm20b/gr_gm20b.h>
+#include <gm20b/fb_gm20b.h>
+#include <gm20b/fifo_gm20b.h>
+#include <gm20b/pmu_gm20b.h>
+#include <gm20b/mm_gm20b.h>
+#include <gm20b/acr_gm20b.h>
+#include <gm20b/ltc_gm20b.h>
+#include <gp10b/fb_gp10b.h>
+#include <gp10b/pmu_gp10b.h>
+#include <gp10b/mm_gp10b.h>
+#include <gp10b/mc_gp10b.h>
+#include <gp10b/ce_gp10b.h>
+#include <gp10b/fifo_gp10b.h>
+#include <gp10b/therm_gp10b.h>
+#include <gp10b/priv_ring_gp10b.h>
+#include <gp10b/ltc_gp10b.h>
+#include <gp106/pmu_gp106.h>
+#include <gp106/acr_gp106.h>
+#include <gv11b/fb_gv11b.h>
+#include <gv11b/pmu_gv11b.h>
+#include <gv11b/acr_gv11b.h>
+#include <gv11b/mm_gv11b.h>
+#include <gv11b/mc_gv11b.h>
+#include <gv11b/ce_gv11b.h>
+#include <gv11b/fifo_gv11b.h>
+#include <gv11b/therm_gv11b.h>
+#include <gv11b/regops_gv11b.h>
+#include <gv11b/gr_ctx_gv11b.h>
+#include <gv11b/ltc_gv11b.h>
+#include <gv11b/gv11b_gating_reglist.h>
+#include <gv100/gr_gv100.h>
+#include <nvgpu/enabled.h>
+#include "vgpu_gv11b.h"
+#include "vgpu_gr_gv11b.h"
+#include "vgpu_fifo_gv11b.h"
+#include "vgpu_subctx_gv11b.h"
+#include "vgpu_tsg_gv11b.h"
+#include <nvgpu/hw/gv11b/hw_fuse_gv11b.h>
+#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
+#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
+#include <nvgpu/hw/gv11b/hw_top_gv11b.h>
+#include <nvgpu/hw/gv11b/hw_pwr_gv11b.h>
+static const struct gpu_ops vgpu_gv11b_ops = {
+        .ltc = {
+                .determine_L2_size_bytes = vgpu_determine_L2_size_bytes,
+                .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry,
+                .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry,
+                .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry,
+                .init_cbc = NULL,
+                .init_fs_state = vgpu_ltc_init_fs_state,
+                .init_comptags = vgpu_ltc_init_comptags,
+                .cbc_ctrl = NULL,
+                .isr = gv11b_ltc_isr,
+                .cbc_fix_config = gv11b_ltc_cbc_fix_config,
+                .flush = gm20b_flush_ltc,
+                .set_enabled = gp10b_ltc_set_enabled,
+        },
+        .ce2 = {
+                .isr_stall = gv11b_ce_isr,
+                .isr_nonstall = gp10b_ce_nonstall_isr,
+                .get_num_pce = vgpu_ce_get_num_pce,
+        },
+        .gr = {
+                .init_gpc_mmu = gr_gv11b_init_gpc_mmu,
+                .bundle_cb_defaults = gr_gv11b_bundle_cb_defaults,
+                .cb_size_default = gr_gv11b_cb_size_default,
+                .calc_global_ctx_buffer_size =
+                        gr_gv11b_calc_global_ctx_buffer_size,
+                .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
+                .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
+                .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
+                .commit_global_pagepool = gr_gp10b_commit_global_pagepool,
+                .handle_sw_method = gr_gv11b_handle_sw_method,
+                .set_alpha_circular_buffer_size =
+                        gr_gv11b_set_alpha_circular_buffer_size,
+                .set_circular_buffer_size = gr_gv11b_set_circular_buffer_size,
+                .enable_hww_exceptions = gr_gv11b_enable_hww_exceptions,
+                .is_valid_class = gr_gv11b_is_valid_class,
+                .is_valid_gfx_class = gr_gv11b_is_valid_gfx_class,
+                .is_valid_compute_class = gr_gv11b_is_valid_compute_class,
+                .get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
+                .get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
+                .init_fs_state = vgpu_gm20b_init_fs_state,
+                .set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
+                .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
+                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
+                .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
+                .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
+                .free_channel_ctx = vgpu_gr_free_channel_ctx,
+                .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
+                .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
+                .get_zcull_info = vgpu_gr_get_zcull_info,
+                .is_tpc_addr = gr_gm20b_is_tpc_addr,
+                .get_tpc_num = gr_gm20b_get_tpc_num,
+                .detect_sm_arch = vgpu_gr_detect_sm_arch,
+                .add_zbc_color = gr_gp10b_add_zbc_color,
+                .add_zbc_depth = gr_gp10b_add_zbc_depth,
+                .zbc_set_table = vgpu_gr_add_zbc,
+                .zbc_query_table = vgpu_gr_query_zbc,
+                .pmu_save_zbc = gk20a_pmu_save_zbc,
+                .add_zbc = gr_gk20a_add_zbc,
+                .pagepool_default_size = gr_gv11b_pagepool_default_size,
+                .init_ctx_state = vgpu_gr_gp10b_init_ctx_state,
+                .alloc_gr_ctx = vgpu_gr_gp10b_alloc_gr_ctx,
+                .free_gr_ctx = vgpu_gr_gp10b_free_gr_ctx,
+                .update_ctxsw_preemption_mode =
+                        gr_gp10b_update_ctxsw_preemption_mode,
+                .dump_gr_regs = NULL,
+                .update_pc_sampling = gr_gm20b_update_pc_sampling,
+                .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
+                .get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
+                .get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
+                .get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
+                .get_max_fbps_count = vgpu_gr_get_max_fbps_count,
+                .init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
+                .wait_empty = gr_gv11b_wait_empty,
+                .init_cyclestats = vgpu_gr_gm20b_init_cyclestats,
+                .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
+                .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
+                .bpt_reg_info = gv11b_gr_bpt_reg_info,
+                .get_access_map = gr_gv11b_get_access_map,
+                .handle_fecs_error = gr_gv11b_handle_fecs_error,
+                .handle_sm_exception = gr_gk20a_handle_sm_exception,
+                .handle_tex_exception = gr_gv11b_handle_tex_exception,
+                .enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions,
+                .enable_exceptions = gr_gv11b_enable_exceptions,
+                .get_lrf_tex_ltc_dram_override = get_ecc_override_val,
+                .update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
+                .update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
+                .record_sm_error_state = gv11b_gr_record_sm_error_state,
+                .update_sm_error_state = gv11b_gr_update_sm_error_state,
+                .clear_sm_error_state = vgpu_gr_clear_sm_error_state,
+                .suspend_contexts = vgpu_gr_suspend_contexts,
+                .resume_contexts = vgpu_gr_resume_contexts,
+                .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
+                .init_sm_id_table = gr_gv100_init_sm_id_table,
+                .load_smid_config = gr_gv11b_load_smid_config,
+                .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,
+                .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
+                .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
+                .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
+                .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
+                .setup_rop_mapping = gr_gv11b_setup_rop_mapping,
+                .program_zcull_mapping = gr_gv11b_program_zcull_mapping,
+                .commit_global_timeslice = gr_gv11b_commit_global_timeslice,
+                .commit_inst = vgpu_gr_gv11b_commit_inst,
+                .write_zcull_ptr = gr_gv11b_write_zcull_ptr,
+                .write_pm_ptr = gr_gv11b_write_pm_ptr,
+                .init_elcg_mode = gr_gv11b_init_elcg_mode,
+                .load_tpc_mask = gr_gv11b_load_tpc_mask,
+                .inval_icache = gr_gk20a_inval_icache,
+                .trigger_suspend = gv11b_gr_sm_trigger_suspend,
+                .wait_for_pause = gr_gk20a_wait_for_pause,
+                .resume_from_pause = gv11b_gr_resume_from_pause,
+                .clear_sm_errors = gr_gk20a_clear_sm_errors,
+                .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
+                .get_esr_sm_sel = gv11b_gr_get_esr_sm_sel,
+                .sm_debugger_attached = gv11b_gr_sm_debugger_attached,
+                .suspend_single_sm = gv11b_gr_suspend_single_sm,
+                .suspend_all_sms = gv11b_gr_suspend_all_sms,
+                .resume_single_sm = gv11b_gr_resume_single_sm,
+                .resume_all_sms = gv11b_gr_resume_all_sms,
+                .get_sm_hww_warp_esr = gv11b_gr_get_sm_hww_warp_esr,
+                .get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr,
+                .get_sm_no_lock_down_hww_global_esr_mask =
+                        gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask,
+                .lock_down_sm = gv11b_gr_lock_down_sm,
+                .wait_for_sm_lock_down = gv11b_gr_wait_for_sm_lock_down,
+                .clear_sm_hww = gv11b_gr_clear_sm_hww,
+                .init_ovr_sm_dsm_perf =  gv11b_gr_init_ovr_sm_dsm_perf,
+                .get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs,
+                .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
+                .set_boosted_ctx = NULL,
+                .set_preemption_mode = vgpu_gr_gp10b_set_preemption_mode,
+                .set_czf_bypass = NULL,
+                .pre_process_sm_exception = gr_gv11b_pre_process_sm_exception,
+                .set_preemption_buffer_va = gr_gv11b_set_preemption_buffer_va,
+                .init_preemption_state = NULL,
+                .update_boosted_ctx = NULL,
+                .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
+                .create_gr_sysfs = gr_gv11b_create_sysfs,
+                .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
+                .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
+                .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
+                .handle_tpc_mpc_exception = gr_gv11b_handle_tpc_mpc_exception,
+                .zbc_s_query_table = gr_gv11b_zbc_s_query_table,
+                .load_zbc_s_default_tbl = gr_gv11b_load_stencil_default_tbl,
+                .handle_gpc_gpcmmu_exception =
+                        gr_gv11b_handle_gpc_gpcmmu_exception,
+                .add_zbc_type_s = gr_gv11b_add_zbc_type_s,
+                .get_egpc_base = gv11b_gr_get_egpc_base,
+                .get_egpc_etpc_num = gv11b_gr_get_egpc_etpc_num,
+                .handle_gpc_gpccs_exception =
+                        gr_gv11b_handle_gpc_gpccs_exception,
+                .load_zbc_s_tbl = gr_gv11b_load_stencil_tbl,
+                .access_smpc_reg = gv11b_gr_access_smpc_reg,
+                .is_egpc_addr = gv11b_gr_pri_is_egpc_addr,
+                .add_zbc_s = gr_gv11b_add_zbc_stencil,
+                .handle_gcc_exception = gr_gv11b_handle_gcc_exception,
+                .init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle,
+                .handle_tpc_sm_ecc_exception =
+                        gr_gv11b_handle_tpc_sm_ecc_exception,
+                .decode_egpc_addr = gv11b_gr_decode_egpc_addr,
+                .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
+        },
+        .fb = {
+                .reset = gv11b_fb_reset,
+                .init_hw = gk20a_fb_init_hw,
+                .init_fs_state = gv11b_fb_init_fs_state,
+                .init_cbc = gv11b_fb_init_cbc,
+                .set_mmu_page_size = gm20b_fb_set_mmu_page_size,
+                .set_use_full_comp_tag_line =
+                        gm20b_fb_set_use_full_comp_tag_line,
+                .compression_page_size = gp10b_fb_compression_page_size,
+                .compressible_page_size = gp10b_fb_compressible_page_size,
+                .vpr_info_fetch = gm20b_fb_vpr_info_fetch,
+                .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info,
+                .read_wpr_info = gm20b_fb_read_wpr_info,
+                .is_debug_mode_enabled = NULL,
+                .set_debug_mode = vgpu_mm_mmu_set_debug_mode,
+                .tlb_invalidate = vgpu_mm_tlb_invalidate,
+                .hub_isr = gv11b_fb_hub_isr,
+        },
+        .clock_gating = {
+                .slcg_bus_load_gating_prod =
+                        gv11b_slcg_bus_load_gating_prod,
+                .slcg_ce2_load_gating_prod =
+                        gv11b_slcg_ce2_load_gating_prod,
+                .slcg_chiplet_load_gating_prod =
+                        gv11b_slcg_chiplet_load_gating_prod,
+                .slcg_ctxsw_firmware_load_gating_prod =
+                        gv11b_slcg_ctxsw_firmware_load_gating_prod,
+                .slcg_fb_load_gating_prod =
+                        gv11b_slcg_fb_load_gating_prod,
+                .slcg_fifo_load_gating_prod =
+                        gv11b_slcg_fifo_load_gating_prod,
+                .slcg_gr_load_gating_prod =
+                        gr_gv11b_slcg_gr_load_gating_prod,
+                .slcg_ltc_load_gating_prod =
+                        ltc_gv11b_slcg_ltc_load_gating_prod,
+                .slcg_perf_load_gating_prod =
+                        gv11b_slcg_perf_load_gating_prod,
+                .slcg_priring_load_gating_prod =
+                        gv11b_slcg_priring_load_gating_prod,
+                .slcg_pmu_load_gating_prod =
+                        gv11b_slcg_pmu_load_gating_prod,
+                .slcg_therm_load_gating_prod =
+                        gv11b_slcg_therm_load_gating_prod,
+                .slcg_xbar_load_gating_prod =
+                        gv11b_slcg_xbar_load_gating_prod,
+                .blcg_bus_load_gating_prod =
+                        gv11b_blcg_bus_load_gating_prod,
+                .blcg_ce_load_gating_prod =
+                        gv11b_blcg_ce_load_gating_prod,
+                .blcg_ctxsw_firmware_load_gating_prod =
+                        gv11b_blcg_ctxsw_firmware_load_gating_prod,
+                .blcg_fb_load_gating_prod =
+                        gv11b_blcg_fb_load_gating_prod,
+                .blcg_fifo_load_gating_prod =
+                        gv11b_blcg_fifo_load_gating_prod,
+                .blcg_gr_load_gating_prod =
+                        gv11b_blcg_gr_load_gating_prod,
+                .blcg_ltc_load_gating_prod =
+                        gv11b_blcg_ltc_load_gating_prod,
+                .blcg_pwr_csb_load_gating_prod =
+                        gv11b_blcg_pwr_csb_load_gating_prod,
+                .blcg_pmu_load_gating_prod =
+                        gv11b_blcg_pmu_load_gating_prod,
+                .blcg_xbar_load_gating_prod =
+                        gv11b_blcg_xbar_load_gating_prod,
+                .pg_gr_load_gating_prod =
+                        gr_gv11b_pg_gr_load_gating_prod,
+        },
+        .fifo = {
+                .init_fifo_setup_hw = vgpu_gv11b_init_fifo_setup_hw,
+                .bind_channel = vgpu_channel_bind,
+                .unbind_channel = vgpu_channel_unbind,
+                .disable_channel = vgpu_channel_disable,
+                .enable_channel = vgpu_channel_enable,
+                .alloc_inst = vgpu_channel_alloc_inst,
+                .free_inst = vgpu_channel_free_inst,
+                .setup_ramfc = vgpu_channel_setup_ramfc,
+                .channel_set_timeslice = vgpu_channel_set_timeslice,
+                .default_timeslice_us = vgpu_fifo_default_timeslice_us,
+                .setup_userd = gk20a_fifo_setup_userd,
+                .userd_gp_get = gv11b_userd_gp_get,
+                .userd_gp_put = gv11b_userd_gp_put,
+                .userd_pb_get = gv11b_userd_pb_get,
+                .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
+                .preempt_channel = vgpu_fifo_preempt_channel,
+                .preempt_tsg = vgpu_fifo_preempt_tsg,
+                .enable_tsg = vgpu_enable_tsg,
+                .disable_tsg = gk20a_disable_tsg,
+                .tsg_verify_channel_status = NULL,
+                .tsg_verify_status_ctx_reload = NULL,
+                /* TODO: implement it for CE fault */
+                .tsg_verify_status_faulted = NULL,
+                .update_runlist = vgpu_fifo_update_runlist,
+                .trigger_mmu_fault = NULL,
+                .get_mmu_fault_info = NULL,
+                .wait_engine_idle = vgpu_fifo_wait_engine_idle,
+                .get_num_fifos = gv11b_fifo_get_num_fifos,
+                .get_pbdma_signature = gp10b_fifo_get_pbdma_signature,
+                .set_runlist_interleave = vgpu_fifo_set_runlist_interleave,
+                .tsg_set_timeslice = vgpu_tsg_set_timeslice,
+                .tsg_open = vgpu_tsg_open,
+                .force_reset_ch = vgpu_fifo_force_reset_ch,
+                .engine_enum_from_type = gp10b_fifo_engine_enum_from_type,
+                .device_info_data_parse = gp10b_device_info_data_parse,
+                .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v,
+                .init_engine_info = vgpu_fifo_init_engine_info,
+                .runlist_entry_size = ram_rl_entry_size_v,
+                .get_tsg_runlist_entry = gv11b_get_tsg_runlist_entry,
+                .get_ch_runlist_entry = gv11b_get_ch_runlist_entry,
+                .is_fault_engine_subid_gpc = gv11b_is_fault_engine_subid_gpc,
+                .dump_pbdma_status = gk20a_dump_pbdma_status,
+                .dump_eng_status = gv11b_dump_eng_status,
+                .dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
+                .intr_0_error_mask = gv11b_fifo_intr_0_error_mask,
+                .is_preempt_pending = gv11b_fifo_is_preempt_pending,
+                .init_pbdma_intr_descs = gv11b_fifo_init_pbdma_intr_descs,
+                .reset_enable_hw = gv11b_init_fifo_reset_enable_hw,
+                .teardown_ch_tsg = gv11b_fifo_teardown_ch_tsg,
+                .handle_sched_error = gv11b_fifo_handle_sched_error,
+                .handle_pbdma_intr_0 = gv11b_fifo_handle_pbdma_intr_0,
+                .handle_pbdma_intr_1 = gv11b_fifo_handle_pbdma_intr_1,
+                .init_eng_method_buffers = gv11b_fifo_init_eng_method_buffers,
+                .deinit_eng_method_buffers =
+                        gv11b_fifo_deinit_eng_method_buffers,
+                .tsg_bind_channel = vgpu_gv11b_tsg_bind_channel,
+                .tsg_unbind_channel = vgpu_tsg_unbind_channel,
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+                .alloc_syncpt_buf = vgpu_gv11b_fifo_alloc_syncpt_buf,
+                .free_syncpt_buf = gv11b_fifo_free_syncpt_buf,
+                .add_syncpt_wait_cmd = gv11b_fifo_add_syncpt_wait_cmd,
+                .get_syncpt_wait_cmd_size = gv11b_fifo_get_syncpt_wait_cmd_size,
+                .add_syncpt_incr_cmd = gv11b_fifo_add_syncpt_incr_cmd,
+                .get_syncpt_incr_cmd_size = gv11b_fifo_get_syncpt_incr_cmd_size,
+#endif
+                .resetup_ramfc = NULL,
+                .reschedule_runlist = NULL,
+                .device_info_fault_id = top_device_info_data_fault_id_enum_v,
+                .free_channel_ctx_header = vgpu_gv11b_free_subctx_header,
+                .preempt_ch_tsg = gv11b_fifo_preempt_ch_tsg,
+                .handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
+        },
+        .gr_ctx = {
+                .get_netlist_name = gr_gv11b_get_netlist_name,
+                .is_fw_defined = gr_gv11b_is_firmware_defined,
+        },
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        .fecs_trace = {
+                .alloc_user_buffer = NULL,
+                .free_user_buffer = NULL,
+                .mmap_user_buffer = NULL,
+                .init = NULL,
+                .deinit = NULL,
+                .enable = NULL,
+                .disable = NULL,
+                .is_enabled = NULL,
+                .reset = NULL,
+                .flush = NULL,
+                .poll = NULL,
+                .bind_channel = NULL,
+                .unbind_channel = NULL,
+                .max_entries = NULL,
+        },
+#endif /* CONFIG_GK20A_CTXSW_TRACE */
+        .mm = {
+                /* FIXME: add support for sparse mappings */
+                .support_sparse = NULL,
+                .gmmu_map = vgpu_gp10b_locked_gmmu_map,
+                .gmmu_unmap = vgpu_locked_gmmu_unmap,
+                .vm_bind_channel = vgpu_vm_bind_channel,
+                .fb_flush = vgpu_mm_fb_flush,
+                .l2_invalidate = vgpu_mm_l2_invalidate,
+                .l2_flush = vgpu_mm_l2_flush,
+                .cbc_clean = gk20a_mm_cbc_clean,
+                .set_big_page_size = gm20b_mm_set_big_page_size,
+                .get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+                .get_default_big_page_size = gp10b_mm_get_default_big_page_size,
+                .gpu_phys_addr = gm20b_gpu_phys_addr,
+                .get_iommu_bit = gk20a_mm_get_iommu_bit,
+                .get_mmu_levels = gp10b_mm_get_mmu_levels,
+                .init_pdb = gp10b_mm_init_pdb,
+                .init_mm_setup_hw = vgpu_gp10b_init_mm_setup_hw,
+                .is_bar1_supported = gv11b_mm_is_bar1_supported,
+                .init_inst_block = gv11b_init_inst_block,
+                .mmu_fault_pending = gv11b_mm_mmu_fault_pending,
+                .get_kind_invalid = gm20b_get_kind_invalid,
+                .get_kind_pitch = gm20b_get_kind_pitch,
+                .init_bar2_vm = gb10b_init_bar2_vm,
+                .init_bar2_mm_hw_setup = gv11b_init_bar2_mm_hw_setup,
+                .remove_bar2_vm = gv11b_mm_remove_bar2_vm,
+                .fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy,
+        },
+        .therm = {
+                .init_therm_setup_hw = gp10b_init_therm_setup_hw,
+                .elcg_init_idle_filters = gv11b_elcg_init_idle_filters,
+        },
+        .pmu = {
+                .pmu_setup_elpg = gp10b_pmu_setup_elpg,
+                .pmu_get_queue_head = pwr_pmu_queue_head_r,
+                .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v,
+                .pmu_get_queue_tail = pwr_pmu_queue_tail_r,
+                .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v,
+                .pmu_queue_head = gk20a_pmu_queue_head,
+                .pmu_queue_tail = gk20a_pmu_queue_tail,
+                .pmu_msgq_tail = gk20a_pmu_msgq_tail,
+                .pmu_mutex_size = pwr_pmu_mutex__size_1_v,
+                .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
+                .pmu_mutex_release = gk20a_pmu_mutex_release,
+                .write_dmatrfbase = gp10b_write_dmatrfbase,
+                .pmu_elpg_statistics = gp106_pmu_elpg_statistics,
+                .pmu_pg_init_param = gv11b_pg_gr_init,
+                .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
+                .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
+                .dump_secure_fuses = pmu_dump_security_fuses_gp10b,
+                .reset_engine = gp106_pmu_engine_reset,
+                .is_engine_in_reset = gp106_pmu_is_engine_in_reset,
+                .pmu_nsbootstrap = gv11b_pmu_bootstrap,
+                .pmu_pg_set_sub_feature_mask = gv11b_pg_set_subfeature_mask,
+                .is_pmu_supported = gv11b_is_pmu_supported,
+        },
+        .regops = {
+                .get_global_whitelist_ranges =
+                        gv11b_get_global_whitelist_ranges,
+                .get_global_whitelist_ranges_count =
+                        gv11b_get_global_whitelist_ranges_count,
+                .get_context_whitelist_ranges =
+                        gv11b_get_context_whitelist_ranges,
+                .get_context_whitelist_ranges_count =
+                        gv11b_get_context_whitelist_ranges_count,
+                .get_runcontrol_whitelist = gv11b_get_runcontrol_whitelist,
+                .get_runcontrol_whitelist_count =
+                        gv11b_get_runcontrol_whitelist_count,
+                .get_runcontrol_whitelist_ranges =
+                        gv11b_get_runcontrol_whitelist_ranges,
+                .get_runcontrol_whitelist_ranges_count =
+                        gv11b_get_runcontrol_whitelist_ranges_count,
+                .get_qctl_whitelist = gv11b_get_qctl_whitelist,
+                .get_qctl_whitelist_count = gv11b_get_qctl_whitelist_count,
+                .get_qctl_whitelist_ranges = gv11b_get_qctl_whitelist_ranges,
+                .get_qctl_whitelist_ranges_count =
+                        gv11b_get_qctl_whitelist_ranges_count,
+                .apply_smpc_war = gv11b_apply_smpc_war,
+        },
+        .mc = {
+                .intr_enable = mc_gv11b_intr_enable,
+                .intr_unit_config = mc_gp10b_intr_unit_config,
+                .isr_stall = mc_gp10b_isr_stall,
+                .intr_stall = mc_gp10b_intr_stall,
+                .intr_stall_pause = mc_gp10b_intr_stall_pause,
+                .intr_stall_resume = mc_gp10b_intr_stall_resume,
+                .intr_nonstall = mc_gp10b_intr_nonstall,
+                .intr_nonstall_pause = mc_gp10b_intr_nonstall_pause,
+                .intr_nonstall_resume = mc_gp10b_intr_nonstall_resume,
+                .enable = gk20a_mc_enable,
+                .disable = gk20a_mc_disable,
+                .reset = gk20a_mc_reset,
+                .boot_0 = gk20a_mc_boot_0,
+                .is_intr1_pending = mc_gp10b_is_intr1_pending,
+                .is_intr_hub_pending = gv11b_mc_is_intr_hub_pending,
+        },
+        .debug = {
+                .show_dump = NULL,
+        },
+        .dbg_session_ops = {
+                .exec_reg_ops = vgpu_exec_regops,
+                .dbg_set_powergate = vgpu_dbg_set_powergate,
+                .check_and_set_global_reservation =
+                        vgpu_check_and_set_global_reservation,
+                .check_and_set_context_reservation =
+                        vgpu_check_and_set_context_reservation,
+                .release_profiler_reservation =
+                        vgpu_release_profiler_reservation,
+                .perfbuffer_enable = vgpu_perfbuffer_enable,
+                .perfbuffer_disable = vgpu_perfbuffer_disable,
+        },
+        .bus = {
+                .init_hw = gk20a_bus_init_hw,
+                .isr = gk20a_bus_isr,
+                .read_ptimer = vgpu_read_ptimer,
+                .get_timestamps_zipper = vgpu_get_timestamps_zipper,
+                .bar1_bind = NULL,
+        },
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        .css = {
+                .enable_snapshot = vgpu_css_enable_snapshot_buffer,
+                .disable_snapshot = vgpu_css_release_snapshot_buffer,
+                .check_data_available = vgpu_css_flush_snapshots,
+                .set_handled_snapshots = NULL,
+                .allocate_perfmon_ids = NULL,
+                .release_perfmon_ids = NULL,
+        },
+#endif
+        .falcon = {
+                .falcon_hal_sw_init = gk20a_falcon_hal_sw_init,
+        },
+        .priv_ring = {
+                .isr = gp10b_priv_ring_isr,
+        },
+        .chip_init_gpu_characteristics = vgpu_gv11b_init_gpu_characteristics,
+        .get_litter_value = gv11b_get_litter_value,
+};
+int vgpu_gv11b_init_hal(struct gk20a *g)
+{
+        struct gpu_ops *gops = &g->ops;
+        u32 val;
+        bool priv_security;
+        gops->ltc = vgpu_gv11b_ops.ltc;
+        gops->ce2 = vgpu_gv11b_ops.ce2;
+        gops->gr = vgpu_gv11b_ops.gr;
+        gops->fb = vgpu_gv11b_ops.fb;
+        gops->clock_gating = vgpu_gv11b_ops.clock_gating;
+        gops->fifo = vgpu_gv11b_ops.fifo;
+        gops->gr_ctx = vgpu_gv11b_ops.gr_ctx;
+        gops->mm = vgpu_gv11b_ops.mm;
+        gops->fecs_trace = vgpu_gv11b_ops.fecs_trace;
+        gops->therm = vgpu_gv11b_ops.therm;
+        gops->pmu = vgpu_gv11b_ops.pmu;
+        gops->regops = vgpu_gv11b_ops.regops;
+        gops->mc = vgpu_gv11b_ops.mc;
+        gops->debug = vgpu_gv11b_ops.debug;
+        gops->dbg_session_ops = vgpu_gv11b_ops.dbg_session_ops;
+        gops->bus = vgpu_gv11b_ops.bus;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        gops->css = vgpu_gv11b_ops.css;
+#endif
+        gops->falcon = vgpu_gv11b_ops.falcon;
+        gops->priv_ring = vgpu_gv11b_ops.priv_ring;
+        /* Lone functions */
+        gops->chip_init_gpu_characteristics =
+                vgpu_gv11b_ops.chip_init_gpu_characteristics;
+        gops->get_litter_value = vgpu_gv11b_ops.get_litter_value;
+        val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+        if (val) {
+                priv_security = true;
+                pr_err("priv security is enabled\n");
+        } else {
+                priv_security = false;
+                pr_err("priv security is disabled\n");
+        }
+        __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, false);
+        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, priv_security);
+        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, priv_security);
+        /* priv security dependent ops */
+        if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                /* Add in ops from gm20b acr */
+                gops->pmu.prepare_ucode = gp106_prepare_ucode_blob,
+                gops->pmu.pmu_setup_hw_and_bootstrap = gv11b_bootstrap_hs_flcn,
+                gops->pmu.get_wpr = gm20b_wpr_info,
+                gops->pmu.alloc_blob_space = gm20b_alloc_blob_space,
+                gops->pmu.pmu_populate_loader_cfg =
+                        gp106_pmu_populate_loader_cfg,
+                gops->pmu.flcn_populate_bl_dmem_desc =
+                        gp106_flcn_populate_bl_dmem_desc,
+                gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt,
+                gops->pmu.falcon_clear_halt_interrupt_status =
+                        clear_halt_interrupt_status,
+                gops->pmu.init_falcon_setup_hw = gv11b_init_pmu_setup_hw1,
+                gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+                gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
+                gops->pmu.is_lazy_bootstrap = gv11b_is_lazy_bootstrap,
+                gops->pmu.is_priv_load = gv11b_is_priv_load,
+                gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
+        } else {
+                /* Inherit from gk20a */
+                gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob,
+                gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1,
+                gops->pmu.load_lsfalcon_ucode = NULL;
+                gops->pmu.init_wpr_region = NULL;
+                gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
+                gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
+        }
+        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+        g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+        g->name = "gv11b";
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
new file mode 100644
index 00000000..6d8785e4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "gk20a/gk20a.h"
+#include "common/linux/vgpu/vgpu.h"
+#include <linux/tegra_vgpu.h>
+int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_alloc_ctx_header_params *p =
+                                &msg.params.t19x.alloc_ctx_header;
+        struct gr_gk20a *gr = &c->g->gr;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_ALLOC_CTX_HEADER;
+        msg.handle = vgpu_get_handle(c->g);
+        p->ch_handle = c->virt_ctx;
+        p->ctx_header_va = __nvgpu_vm_alloc_va(c->vm,
+                                gr->ctx_vars.golden_image_size,
+                                gmmu_page_size_kernel);
+        if (!p->ctx_header_va) {
+                nvgpu_err(c->g, "alloc va failed for ctx_header");
+                return -ENOMEM;
+        }
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (unlikely(err)) {
+                nvgpu_err(c->g, "alloc ctx_header failed err %d", err);
+                __nvgpu_vm_free_va(c->vm, p->ctx_header_va,
+                        gmmu_page_size_kernel);
+                return err;
+        }
+        ctx->mem.gpu_va = p->ctx_header_va;
+        return err;
+}
+void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c)
+{
+        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_free_ctx_header_params *p =
+                                &msg.params.t19x.free_ctx_header;
+        int err;
+        if (ctx->mem.gpu_va) {
+                msg.cmd = TEGRA_VGPU_CMD_FREE_CTX_HEADER;
+                msg.handle = vgpu_get_handle(c->g);
+                p->ch_handle = c->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                err = err ? err : msg.ret;
+                if (unlikely(err))
+                        nvgpu_err(c->g, "free ctx_header failed err %d", err);
+                __nvgpu_vm_free_va(c->vm, ctx->mem.gpu_va,
+                                gmmu_page_size_kernel);
+                ctx->mem.gpu_va = 0;
+        }
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.h
new file mode 100644
index 00000000..dfd7109e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VGPU_SUBCTX_GV11B_H_
+#define _VGPU_SUBCTX_GV11B_H_
+struct channel_gk20a;
+int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c);
+void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_tsg_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_tsg_gv11b.c
new file mode 100644
index 00000000..094ccc44
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_tsg_gv11b.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/tegra_vgpu.h>
+#include "gk20a/gk20a.h"
+#include "common/linux/vgpu/vgpu.h"
+#include "vgpu_tsg_gv11b.h"
+int vgpu_gv11b_tsg_bind_channel(struct tsg_gk20a *tsg,
+                                struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_tsg_bind_channel_ex_params *p =
+                                &msg.params.t19x.tsg_bind_channel_ex;
+        int err;
+        gk20a_dbg_fn("");
+        err = gk20a_tsg_bind_channel(tsg, ch);
+        if (err)
+                return err;
+        msg.cmd = TEGRA_VGPU_CMD_TSG_BIND_CHANNEL_EX;
+        msg.handle = vgpu_get_handle(tsg->g);
+        p->tsg_id = tsg->tsgid;
+        p->ch_handle = ch->virt_ctx;
+        p->subctx_id = ch->t19x.subctx_id;
+        p->runqueue_sel = ch->t19x.runqueue_sel;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(tsg->g,
+                        "vgpu_gv11b_tsg_bind_channel failed, ch %d tsgid %d",
+                        ch->chid, tsg->tsgid);
+                gk20a_tsg_unbind_channel(ch);
+        }
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_tsg_gv11b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_tsg_gv11b.h
new file mode 100644
index 00000000..6334cdbb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_tsg_gv11b.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VGPU_TSG_GV11B_H_
+#define _VGPU_TSG_GV11B_H_
+int vgpu_gv11b_tsg_bind_channel(struct tsg_gk20a *tsg,
+                                struct channel_gk20a *ch);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/ltc_vgpu.c
new file mode 100644
index 00000000..627ad1a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/ltc_vgpu.c
@@ -0,0 +1,61 @@
+/*
+ * Virtualized GPU L2
+ *
+ * Copyright (c) 2014-2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "vgpu.h"
+#include "ltc_vgpu.h"
+int vgpu_determine_L2_size_bytes(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        return priv->constants.l2_size;
+}
+int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        u32 max_comptag_lines = 0;
+        int err;
+        gk20a_dbg_fn("");
+        gr->cacheline_size = priv->constants.cacheline_size;
+        gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline;
+        gr->slices_per_ltc = priv->constants.slices_per_ltc;
+        max_comptag_lines = priv->constants.comptag_lines;
+        if (max_comptag_lines < 2)
+                return -ENXIO;
+        err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines);
+        if (err)
+                return err;
+        return 0;
+}
+void vgpu_ltc_init_fs_state(struct gk20a *g)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        gk20a_dbg_fn("");
+        g->ltc_count = priv->constants.ltc_count;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/ltc_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/ltc_vgpu.h
new file mode 100644
index 00000000..7b368ef5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/ltc_vgpu.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _LTC_VGPU_H_
+#define _LTC_VGPU_H_
+struct gk20a;
+struct gr_gk20a;
+int vgpu_determine_L2_size_bytes(struct gk20a *g);
+int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr);
+void vgpu_ltc_init_fs_state(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/mm_vgpu.c
new file mode 100644
index 00000000..f8c5c406
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/mm_vgpu.c
@@ -0,0 +1,363 @@
+/*
+ * Virtualized GPU Memory Management
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/dma-mapping.h>
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/vgpu/vm.h>
+#include <nvgpu/linux/vm.h>
+#include <nvgpu/linux/nvgpu_mem.h>
+#include "vgpu.h"
+#include "mm_vgpu.h"
+#include "gk20a/mm_gk20a.h"
+#include "gm20b/mm_gm20b.h"
+static int vgpu_init_mm_setup_sw(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        gk20a_dbg_fn("");
+        if (mm->sw_ready) {
+                gk20a_dbg_fn("skip init");
+                return 0;
+        }
+        nvgpu_mutex_init(&mm->tlb_lock);
+        nvgpu_mutex_init(&mm->priv_lock);
+        mm->g = g;
+        /*TBD: make channel vm size configurable */
+        mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE;
+        mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
+        gk20a_dbg_info("channel vm size: user %dMB  kernel %dMB",
+                       (int)(mm->channel.user_size >> 20),
+                       (int)(mm->channel.kernel_size >> 20));
+        mm->sw_ready = true;
+        return 0;
+}
+int vgpu_init_mm_support(struct gk20a *g)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = vgpu_init_mm_setup_sw(g);
+        if (err)
+                return err;
+        if (g->ops.mm.init_mm_setup_hw)
+                err = g->ops.mm.init_mm_setup_hw(g);
+        return err;
+}
+u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
+                                u64 map_offset,
+                                struct nvgpu_sgt *sgt,
+                                u64 buffer_offset,
+                                u64 size,
+                                int pgsz_idx,
+                                u8 kind_v,
+                                u32 ctag_offset,
+                                u32 flags,
+                                int rw_flag,
+                                bool clear_ctags,
+                                bool sparse,
+                                bool priv,
+                                struct vm_gk20a_mapping_batch *batch,
+                                enum nvgpu_aperture aperture)
+{
+        int err = 0;
+        struct device *d = dev_from_vm(vm);
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+        u64 addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, NULL);
+        u8 prot;
+        gk20a_dbg_fn("");
+        /* Allocate (or validate when map_offset != 0) the virtual address. */
+        if (!map_offset) {
+                map_offset = __nvgpu_vm_alloc_va(vm, size,
+                                          pgsz_idx);
+                if (!map_offset) {
+                        nvgpu_err(g, "failed to allocate va space");
+                        err = -ENOMEM;
+                        goto fail;
+                }
+        }
+        if (rw_flag == gk20a_mem_flag_read_only)
+                prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
+        else if (rw_flag == gk20a_mem_flag_write_only)
+                prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
+        else
+                prot = TEGRA_VGPU_MAP_PROT_NONE;
+        msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = vm->handle;
+        p->addr = addr;
+        p->gpu_va = map_offset;
+        p->size = size;
+        if (pgsz_idx == gmmu_page_size_kernel) {
+                u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
+                if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
+                        pgsz_idx = gmmu_page_size_small;
+                } else if (page_size ==
+                                vm->gmmu_page_sizes[gmmu_page_size_big]) {
+                        pgsz_idx = gmmu_page_size_big;
+                } else {
+                        nvgpu_err(g, "invalid kernel page size %d",
+                                page_size);
+                        goto fail;
+                }
+        }
+        p->pgsz_idx = pgsz_idx;
+        p->iova = mapping ? 1 : 0;
+        p->kind = kind_v;
+        p->cacheable = (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) ? 1 : 0;
+        p->prot = prot;
+        p->ctag_offset = ctag_offset;
+        p->clear_ctags = clear_ctags;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err)
+                goto fail;
+        /* TLB invalidate handled on server side */
+        return map_offset;
+fail:
+        nvgpu_err(g, "%s: failed with err=%d", __func__, err);
+        return 0;
+}
+void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
+                                u64 vaddr,
+                                u64 size,
+                                int pgsz_idx,
+                                bool va_allocated,
+                                int rw_flag,
+                                bool sparse,
+                                struct vm_gk20a_mapping_batch *batch)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+        int err;
+        gk20a_dbg_fn("");
+        if (va_allocated) {
+                err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
+                if (err) {
+                        dev_err(dev_from_vm(vm),
+                                "failed to free va");
+                        return;
+                }
+        }
+        msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = vm->handle;
+        p->gpu_va = vaddr;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                dev_err(dev_from_vm(vm),
+                        "failed to update gmmu ptes on unmap");
+        /* TLB invalidate handled on server side */
+}
+/*
+ * This is called by the common VM init routine to handle vGPU specifics of
+ * intializing a VM on a vGPU. This alone is not enough to init a VM. See
+ * nvgpu_vm_init().
+ */
+int vgpu_vm_init(struct gk20a *g, struct vm_gk20a *vm)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
+        msg.handle = vgpu_get_handle(g);
+        p->size = vm->va_limit;
+        p->big_page_size = vm->big_page_size;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                return -ENOMEM;
+        vm->handle = p->handle;
+        return 0;
+}
+/*
+ * Similar to vgpu_vm_init() this is called as part of the cleanup path for
+ * VMs. This alone is not enough to remove a VM - see nvgpu_vm_remove().
+ */
+void vgpu_vm_remove(struct vm_gk20a *vm)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
+        msg.handle = vgpu_get_handle(g);
+        p->handle = vm->handle;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
+{
+        struct dma_iommu_mapping *mapping =
+                        to_dma_iommu_mapping(dev_from_gk20a(g));
+        u64 addr = nvgpu_mem_get_addr_sgl(g, (*sgt)->sgl);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
+        msg.handle = vgpu_get_handle(g);
+        p->addr = addr;
+        p->size = size;
+        p->iova = mapping ? 1 : 0;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                addr = 0;
+        else
+                addr = p->gpu_va;
+        return addr;
+}
+int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
+                                struct channel_gk20a *ch)
+{
+        struct vm_gk20a *vm = as_share->vm;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
+        int err;
+        gk20a_dbg_fn("");
+        ch->vm = vm;
+        msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->as_handle = vm->handle;
+        p->chan_handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                ch->vm = NULL;
+                err = -ENOMEM;
+        }
+        if (ch->vm)
+                nvgpu_vm_get(ch->vm);
+        return err;
+}
+static void vgpu_cache_maint(u64 handle, u8 op)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
+        msg.handle = handle;
+        p->op = op;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+int vgpu_mm_fb_flush(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        vgpu_cache_maint(vgpu_get_handle(g), TEGRA_VGPU_FB_FLUSH);
+        return 0;
+}
+void vgpu_mm_l2_invalidate(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        vgpu_cache_maint(vgpu_get_handle(g), TEGRA_VGPU_L2_MAINT_INV);
+}
+void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+        u8 op;
+        gk20a_dbg_fn("");
+        if (invalidate)
+                op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
+        else
+                op =  TEGRA_VGPU_L2_MAINT_FLUSH;
+        vgpu_cache_maint(vgpu_get_handle(g), op);
+}
+void vgpu_mm_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
+{
+        gk20a_dbg_fn("");
+        nvgpu_err(g, "call to RM server not supported");
+}
+void vgpu_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_mmu_debug_mode *p = &msg.params.mmu_debug_mode;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_SET_MMU_DEBUG_MODE;
+        msg.handle = vgpu_get_handle(g);
+        p->enable = (u32)enable;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/mm_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/mm_vgpu.h
new file mode 100644
index 00000000..eee54779
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/mm_vgpu.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _MM_VGPU_H_
+#define _MM_VGPU_H_
+u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
+                                u64 map_offset,
+                                struct nvgpu_sgt *sgt,
+                                u64 buffer_offset,
+                                u64 size,
+                                int pgsz_idx,
+                                u8 kind_v,
+                                u32 ctag_offset,
+                                u32 flags,
+                                int rw_flag,
+                                bool clear_ctags,
+                                bool sparse,
+                                bool priv,
+                                struct vm_gk20a_mapping_batch *batch,
+                                enum nvgpu_aperture aperture);
+void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
+                                u64 vaddr,
+                                u64 size,
+                                int pgsz_idx,
+                                bool va_allocated,
+                                int rw_flag,
+                                bool sparse,
+                                struct vm_gk20a_mapping_batch *batch);
+int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
+                                struct channel_gk20a *ch);
+int vgpu_mm_fb_flush(struct gk20a *g);
+void vgpu_mm_l2_invalidate(struct gk20a *g);
+void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate);
+void vgpu_mm_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb);
+void vgpu_mm_mmu_set_debug_mode(struct gk20a *g, bool enable);
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
new file mode 100644
index 00000000..830b04ac
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
@@ -0,0 +1,69 @@
+/*
+ * Tegra Virtualized GPU Platform Interface
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "gk20a/gk20a.h"
+#include "common/linux/platform_gk20a.h"
+#include "clk_vgpu.h"
+#include <nvgpu/nvhost.h>
+static int gk20a_tegra_probe(struct device *dev)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        int ret;
+        ret = nvgpu_get_nvhost_dev(platform->g);
+        if (ret)
+                return ret;
+        vgpu_init_clk_support(platform->g);
+        return 0;
+#else
+        return 0;
+#endif
+}
+struct gk20a_platform vgpu_tegra_platform = {
+        .has_syncpoints = true,
+        .aggressive_sync_destroy_thresh = 64,
+        /* power management configuration */
+        .can_railgate_init      = false,
+        .can_elpg_init          = false,
+        .enable_slcg            = false,
+        .enable_blcg            = false,
+        .enable_elcg            = false,
+        .enable_elpg            = false,
+        .enable_aelpg           = false,
+        .can_slcg               = false,
+        .can_blcg               = false,
+        .can_elcg               = false,
+        .ch_wdt_timeout_ms = 5000,
+        .probe = gk20a_tegra_probe,
+        .clk_round_rate = vgpu_clk_round_rate,
+        .get_clk_freqs = vgpu_clk_get_freqs,
+        /* frequency scaling configuration */
+        .devfreq_governor = "userspace",
+        .virtual_dev = true,
+};
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
new file mode 100644
index 00000000..4025aabd
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/device.h>
+#include "vgpu.h"
+static ssize_t vgpu_load_show(struct device *dev,
+                              struct device_attribute *attr,
+                              char *buf)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD;
+        msg.handle = vgpu_get_handle(g);
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err)
+                return err;
+        return snprintf(buf, PAGE_SIZE, "%u\n", p->load);
+}
+static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
+void vgpu_create_sysfs(struct device *dev)
+{
+        if (device_create_file(dev, &dev_attr_load))
+                dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
+}
+void vgpu_remove_sysfs(struct device *dev)
+{
+        device_remove_file(dev, &dev_attr_load);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/tsg_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/tsg_vgpu.c
new file mode 100644
index 00000000..c40e6f90
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/tsg_vgpu.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/tegra_vgpu.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
+#include "common/linux/platform_gk20a.h"
+#include "vgpu.h"
+#include "fifo_vgpu.h"
+#include <nvgpu/bug.h>
+int vgpu_tsg_open(struct tsg_gk20a *tsg)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_tsg_open_params *p =
+                                &msg.params.tsg_open;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_TSG_OPEN;
+        msg.handle = vgpu_get_handle(tsg->g);
+        p->tsg_id = tsg->tsgid;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(tsg->g,
+                        "vgpu_tsg_open failed, tsgid %d", tsg->tsgid);
+        }
+        return err;
+}
+int vgpu_enable_tsg(struct tsg_gk20a *tsg)
+{
+        struct gk20a *g = tsg->g;
+        struct channel_gk20a *ch;
+        nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+        nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry)
+                g->ops.fifo.enable_channel(ch);
+        nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+        return 0;
+}
+int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg,
+                        struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_tsg_bind_unbind_channel_params *p =
+                                &msg.params.tsg_bind_unbind_channel;
+        int err;
+        gk20a_dbg_fn("");
+        err = gk20a_tsg_bind_channel(tsg, ch);
+        if (err)
+                return err;
+        msg.cmd = TEGRA_VGPU_CMD_TSG_BIND_CHANNEL;
+        msg.handle = vgpu_get_handle(tsg->g);
+        p->tsg_id = tsg->tsgid;
+        p->ch_handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(tsg->g,
+                        "vgpu_tsg_bind_channel failed, ch %d tsgid %d",
+                        ch->chid, tsg->tsgid);
+                gk20a_tsg_unbind_channel(ch);
+        }
+        return err;
+}
+int vgpu_tsg_unbind_channel(struct channel_gk20a *ch)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_tsg_bind_unbind_channel_params *p =
+                                &msg.params.tsg_bind_unbind_channel;
+        int err;
+        gk20a_dbg_fn("");
+        err = gk20a_tsg_unbind_channel(ch);
+        if (err)
+                return err;
+        msg.cmd = TEGRA_VGPU_CMD_TSG_UNBIND_CHANNEL;
+        msg.handle = vgpu_get_handle(ch->g);
+        p->ch_handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        return err;
+}
+int vgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_tsg_timeslice_params *p =
+                                &msg.params.tsg_timeslice;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_TSG_SET_TIMESLICE;
+        msg.handle = vgpu_get_handle(tsg->g);
+        p->tsg_id = tsg->tsgid;
+        p->timeslice_us = timeslice;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        WARN_ON(err);
+        if (!err)
+                tsg->timeslice_us = timeslice;
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c
new file mode 100644
index 00000000..7768b21d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c
@@ -0,0 +1,776 @@
+/*
+ * Virtualized GPU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm_qos.h>
+#include <soc/tegra/chip-id.h>
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/bus.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/ctxsw_trace.h>
+#include "vgpu.h"
+#include "fecs_trace_vgpu.h"
+#include "clk_vgpu.h"
+#include "gk20a/tsg_gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gm20b/hal_gm20b.h"
+#include "common/linux/module.h"
+#include "common/linux/os_linux.h"
+#include "common/linux/ioctl.h"
+#include "common/linux/scale.h"
+#include "common/linux/driver_common.h"
+#ifdef CONFIG_TEGRA_19x_GPU
+#include "common/linux/vgpu/vgpu_t19x.h"
+#include <nvgpu_gpuid_t19x.h>
+#endif
+#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
+static inline int vgpu_comm_init(struct platform_device *pdev)
+{
+        size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
+        return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3,
+                                queue_sizes, TEGRA_VGPU_QUEUE_CMD,
+                                ARRAY_SIZE(queue_sizes));
+}
+static inline void vgpu_comm_deinit(void)
+{
+        size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
+        tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD,
+                        ARRAY_SIZE(queue_sizes));
+}
+int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
+                size_t size_out)
+{
+        void *handle;
+        size_t size = size_in;
+        void *data = msg;
+        int err;
+        err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT,
+                                tegra_gr_comm_get_server_vmid(),
+                                TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size);
+        if (!err) {
+                WARN_ON(size < size_out);
+                memcpy(msg, data, size_out);
+                tegra_gr_comm_release(handle);
+        }
+        return err;
+}
+static u64 vgpu_connect(void)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_connect_params *p = &msg.params.connect;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_CONNECT;
+        p->module = TEGRA_VGPU_MODULE_GPU;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? 0 : p->handle;
+}
+int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value)
+{
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_attrib_params *p = &msg.params.attrib;
+        int err;
+        msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE;
+        msg.handle = handle;
+        p->attrib = attrib;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret)
+                return -1;
+        *value = p->value;
+        return 0;
+}
+static void vgpu_handle_channel_event(struct gk20a *g,
+                        struct tegra_vgpu_channel_event_info *info)
+{
+        if (info->id >= g->fifo.num_channels ||
+                info->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) {
+                nvgpu_err(g, "invalid channel event");
+                return;
+        }
+        if (info->is_tsg) {
+                struct tsg_gk20a *tsg = &g->fifo.tsg[info->id];
+                gk20a_tsg_event_id_post_event(tsg, info->event_id);
+        } else {
+                struct channel_gk20a *ch = &g->fifo.channel[info->id];
+                if (!gk20a_channel_get(ch)) {
+                        nvgpu_err(g, "invalid channel %d for event %d",
+                                        (int)info->id, (int)info->event_id);
+                        return;
+                }
+                gk20a_channel_event_id_post_event(ch, info->event_id);
+                gk20a_channel_put(ch);
+        }
+}
+static int vgpu_intr_thread(void *dev_id)
+{
+        struct gk20a *g = dev_id;
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        while (true) {
+                struct tegra_vgpu_intr_msg *msg;
+                u32 sender;
+                void *handle;
+                size_t size;
+                int err;
+                err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT,
+                                        TEGRA_VGPU_QUEUE_INTR, &handle,
+                                        (void **)&msg, &size, &sender);
+                if (err == -ETIME)
+                        continue;
+                if (WARN_ON(err))
+                        continue;
+                if (msg->event == TEGRA_VGPU_EVENT_ABORT) {
+                        tegra_gr_comm_release(handle);
+                        break;
+                }
+                switch (msg->event) {
+                case TEGRA_VGPU_EVENT_INTR:
+                        if (msg->unit == TEGRA_VGPU_INTR_GR)
+                                vgpu_gr_isr(g, &msg->info.gr_intr);
+                        else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_GR)
+                                vgpu_gr_nonstall_isr(g,
+                                        &msg->info.gr_nonstall_intr);
+                        else if (msg->unit == TEGRA_VGPU_INTR_FIFO)
+                                vgpu_fifo_isr(g, &msg->info.fifo_intr);
+                        else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_FIFO)
+                                vgpu_fifo_nonstall_isr(g,
+                                                &msg->info.fifo_nonstall_intr);
+                        else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_CE2)
+                                vgpu_ce2_nonstall_isr(g,
+                                        &msg->info.ce2_nonstall_intr);
+                        break;
+                case TEGRA_VGPU_EVENT_FECS_TRACE:
+                        vgpu_fecs_trace_data_update(g);
+                        break;
+                case TEGRA_VGPU_EVENT_CHANNEL:
+                        vgpu_handle_channel_event(g, &msg->info.channel_event);
+                        break;
+                case TEGRA_VGPU_EVENT_SM_ESR:
+                        vgpu_gr_handle_sm_esr_event(g, &msg->info.sm_esr);
+                        break;
+                default:
+                        nvgpu_err(g, "unknown event %u", msg->event);
+                        break;
+                }
+                tegra_gr_comm_release(handle);
+        }
+        while (!nvgpu_thread_should_stop(&priv->intr_handler))
+                msleep(10);
+        return 0;
+}
+static void vgpu_remove_support(struct gk20a *g)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct vgpu_priv_data *priv =
+                vgpu_get_priv_data_from_dev(dev_from_gk20a(g));
+        struct tegra_vgpu_intr_msg msg;
+        int err;
+        if (g->dbg_regops_tmp_buf)
+                nvgpu_kfree(g, g->dbg_regops_tmp_buf);
+        if (g->pmu.remove_support)
+                g->pmu.remove_support(&g->pmu);
+        if (g->gr.remove_support)
+                g->gr.remove_support(&g->gr);
+        if (g->fifo.remove_support)
+                g->fifo.remove_support(&g->fifo);
+        if (g->mm.remove_support)
+                g->mm.remove_support(&g->mm);
+        msg.event = TEGRA_VGPU_EVENT_ABORT;
+        err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT,
+                                TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR,
+                                &msg, sizeof(msg));
+        WARN_ON(err);
+        nvgpu_thread_stop(&priv->intr_handler);
+        /* free mappings to registers, etc*/
+        if (l->bar1) {
+                iounmap(l->bar1);
+                l->bar1 = NULL;
+        }
+}
+static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        nvgpu_mutex_init(&g->poweron_lock);
+        nvgpu_mutex_init(&g->poweroff_lock);
+        l->regs_saved = l->regs;
+        l->bar1_saved = l->bar1;
+        nvgpu_init_list_node(&g->pending_sema_waits);
+        nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
+        g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
+        g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
+        g->has_syncpoints = platform->has_syncpoints;
+        g->ptimer_src_freq = platform->ptimer_src_freq;
+        g->can_railgate = platform->can_railgate_init;
+        g->railgate_delay = platform->railgate_delay_init;
+        __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
+                            platform->unify_address_spaces);
+}
+static int vgpu_init_support(struct platform_device *pdev)
+{
+        struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+        struct gk20a *g = get_gk20a(&pdev->dev);
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        void __iomem *regs;
+        int err = 0;
+        if (!r) {
+                nvgpu_err(g, "failed to get gk20a bar1");
+                err = -ENXIO;
+                goto fail;
+        }
+        if (r->name && !strcmp(r->name, "/vgpu")) {
+                regs = devm_ioremap_resource(&pdev->dev, r);
+                if (IS_ERR(regs)) {
+                        nvgpu_err(g, "failed to remap gk20a bar1");
+                        err = PTR_ERR(regs);
+                        goto fail;
+                }
+                l->bar1 = regs;
+                l->bar1_mem = r;
+        }
+        nvgpu_mutex_init(&g->dbg_sessions_lock);
+        nvgpu_mutex_init(&g->client_lock);
+        nvgpu_init_list_node(&g->profiler_objects);
+        g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
+        if (!g->dbg_regops_tmp_buf) {
+                nvgpu_err(g, "couldn't allocate regops tmp buf");
+                return -ENOMEM;
+        }
+        g->dbg_regops_tmp_buf_ops =
+                SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
+        g->remove_support = vgpu_remove_support;
+        return 0;
+ fail:
+        vgpu_remove_support(g);
+        return err;
+}
+int vgpu_pm_prepare_poweroff(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        int ret = 0;
+        gk20a_dbg_fn("");
+        if (!g->power_on)
+                return 0;
+        ret = gk20a_channel_suspend(g);
+        if (ret)
+                return ret;
+        g->power_on = false;
+        return ret;
+}
+static void vgpu_detect_chip(struct gk20a *g)
+{
+        struct nvgpu_gpu_params *p = &g->params;
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        p->gpu_arch = priv->constants.arch;
+        p->gpu_impl = priv->constants.impl;
+        p->gpu_rev = priv->constants.rev;
+        gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
+                        p->gpu_arch,
+                        p->gpu_impl,
+                        p->gpu_rev);
+}
+int vgpu_init_gpu_characteristics(struct gk20a *g)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = gk20a_init_gpu_characteristics(g);
+        if (err)
+                return err;
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false);
+        /* features vgpu does not support */
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false);
+        return 0;
+}
+int vgpu_read_ptimer(struct gk20a *g, u64 *value)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_read_ptimer_params *p = &msg.params.read_ptimer;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_READ_PTIMER;
+        msg.handle = vgpu_get_handle(g);
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (!err)
+                *value = p->time;
+        else
+                nvgpu_err(g, "vgpu read ptimer failed, err=%d", err);
+        return err;
+}
+int vgpu_get_timestamps_zipper(struct gk20a *g,
+                u32 source_id, u32 count,
+                struct nvgpu_cpu_time_correlation_sample *samples)
+{
+        struct tegra_vgpu_cmd_msg msg = {0};
+        struct tegra_vgpu_get_timestamps_zipper_params *p =
+                        &msg.params.get_timestamps_zipper;
+        int err;
+        u32 i;
+        gk20a_dbg_fn("");
+        if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
+                nvgpu_err(g, "count %u overflow", count);
+                return -EINVAL;
+        }
+        msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
+        msg.handle = vgpu_get_handle(g);
+        p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
+        p->count = count;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err) {
+                nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err);
+                return err;
+        }
+        for (i = 0; i < count; i++) {
+                samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
+                samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
+        }
+        return err;
+}
+static int vgpu_init_hal(struct gk20a *g)
+{
+        u32 ver = g->params.gpu_arch + g->params.gpu_impl;
+        int err;
+        switch (ver) {
+        case GK20A_GPUID_GM20B:
+        case GK20A_GPUID_GM20B_B:
+                gk20a_dbg_info("gm20b detected");
+                err = vgpu_gm20b_init_hal(g);
+                break;
+        case NVGPU_GPUID_GP10B:
+                gk20a_dbg_info("gp10b detected");
+                err = vgpu_gp10b_init_hal(g);
+                break;
+#ifdef CONFIG_TEGRA_19x_GPU
+        case TEGRA_19x_GPUID:
+                err = vgpu_t19x_init_hal(g);
+                break;
+#endif
+        default:
+                nvgpu_err(g, "no support for %x", ver);
+                err = -ENODEV;
+                break;
+        }
+        return err;
+}
+int vgpu_pm_finalize_poweron(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        int err;
+        gk20a_dbg_fn("");
+        if (g->power_on)
+                return 0;
+        g->power_on = true;
+        vgpu_detect_chip(g);
+        err = vgpu_init_hal(g);
+        if (err)
+                goto done;
+        if (g->ops.ltc.init_fs_state)
+                g->ops.ltc.init_fs_state(g);
+        err = vgpu_init_mm_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a mm");
+                goto done;
+        }
+        err = vgpu_init_fifo_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a fifo");
+                goto done;
+        }
+        err = vgpu_init_gr_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a gr");
+                goto done;
+        }
+        err = g->ops.chip_init_gpu_characteristics(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a gpu characteristics");
+                goto done;
+        }
+        gk20a_ctxsw_trace_init(g);
+        gk20a_sched_ctrl_init(g);
+        gk20a_channel_resume(g);
+done:
+        return err;
+}
+static int vgpu_qos_notify(struct notifier_block *nb,
+                          unsigned long n, void *data)
+{
+        struct gk20a_scale_profile *profile =
+                        container_of(nb, struct gk20a_scale_profile,
+                        qos_notify_block);
+        struct gk20a *g = get_gk20a(profile->dev);
+        u32 max_freq;
+        int err;
+        gk20a_dbg_fn("");
+        max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
+        err = vgpu_clk_cap_rate(profile->dev, max_freq);
+        if (err)
+                nvgpu_err(g, "%s failed, err=%d", __func__, err);
+        return NOTIFY_OK; /* need notify call further */
+}
+static int vgpu_pm_qos_init(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct gk20a_scale_profile *profile = g->scale_profile;
+        if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) {
+                if (!profile)
+                        return -EINVAL;
+        } else {
+                profile = nvgpu_kzalloc(g, sizeof(*profile));
+                if (!profile)
+                        return -ENOMEM;
+                g->scale_profile = profile;
+        }
+        profile->dev = dev;
+        profile->qos_notify_block.notifier_call = vgpu_qos_notify;
+        pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                &profile->qos_notify_block);
+        return 0;
+}
+static void vgpu_pm_qos_remove(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                &g->scale_profile->qos_notify_block);
+        nvgpu_kfree(g, g->scale_profile);
+        g->scale_profile = NULL;
+}
+static int vgpu_pm_init(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        unsigned long *freqs;
+        int num_freqs;
+        int err = 0;
+        gk20a_dbg_fn("");
+        if (nvgpu_platform_is_simulation(g))
+                return 0;
+        __pm_runtime_disable(dev, false);
+        if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
+                gk20a_scale_init(dev);
+        if (l->devfreq) {
+                /* set min/max frequency based on frequency table */
+                err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs);
+                if (err)
+                        return err;
+                if (num_freqs < 1)
+                        return -EINVAL;
+                l->devfreq->min_freq = freqs[0];
+                l->devfreq->max_freq = freqs[num_freqs - 1];
+        }
+        err = vgpu_pm_qos_init(dev);
+        if (err)
+                return err;
+        return err;
+}
+static int vgpu_get_constants(struct gk20a *g)
+{
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_constants_params *p = &msg.params.constants;
+        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_GET_CONSTANTS;
+        msg.handle = vgpu_get_handle(g);
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (unlikely(err)) {
+                nvgpu_err(g, "%s failed, err=%d", __func__, err);
+                return err;
+        }
+        if (unlikely(p->gpc_count > TEGRA_VGPU_MAX_GPC_COUNT ||
+                p->max_tpc_per_gpc_count > TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC)) {
+                nvgpu_err(g, "gpc_count %d max_tpc_per_gpc %d overflow",
+                        (int)p->gpc_count, (int)p->max_tpc_per_gpc_count);
+                return -EINVAL;
+        }
+        priv->constants = *p;
+        return 0;
+}
+int vgpu_probe(struct platform_device *pdev)
+{
+        struct nvgpu_os_linux *l;
+        struct gk20a *gk20a;
+        int err;
+        struct device *dev = &pdev->dev;
+        struct gk20a_platform *platform = gk20a_get_platform(dev);
+        struct vgpu_priv_data *priv;
+        if (!platform) {
+                dev_err(dev, "no platform data\n");
+                return -ENODATA;
+        }
+        gk20a_dbg_fn("");
+        l = kzalloc(sizeof(*l), GFP_KERNEL);
+        if (!l) {
+                dev_err(dev, "couldn't allocate gk20a support");
+                return -ENOMEM;
+        }
+        gk20a = &l->g;
+        nvgpu_init_gk20a(gk20a);
+        nvgpu_kmem_init(gk20a);
+        err = nvgpu_init_enabled_flags(gk20a);
+        if (err) {
+                kfree(gk20a);
+                return err;
+        }
+        l->dev = dev;
+        if (tegra_platform_is_vdk())
+                __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
+        gk20a->is_virtual = true;
+        priv = nvgpu_kzalloc(gk20a, sizeof(*priv));
+        if (!priv) {
+                kfree(gk20a);
+                return -ENOMEM;
+        }
+        platform->g = gk20a;
+        platform->vgpu_priv = priv;
+        err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
+        if (err)
+                return err;
+        vgpu_init_support(pdev);
+        vgpu_init_vars(gk20a, platform);
+        init_rwsem(&l->busy_lock);
+        nvgpu_spinlock_init(&gk20a->mc_enable_lock);
+        gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
+        /* Initialize the platform interface. */
+        err = platform->probe(dev);
+        if (err) {
+                if (err == -EPROBE_DEFER)
+                        dev_info(dev, "platform probe failed");
+                else
+                        dev_err(dev, "platform probe failed");
+                return err;
+        }
+        if (platform->late_probe) {
+                err = platform->late_probe(dev);
+                if (err) {
+                        dev_err(dev, "late probe failed");
+                        return err;
+                }
+        }
+        err = vgpu_comm_init(pdev);
+        if (err) {
+                dev_err(dev, "failed to init comm interface\n");
+                return -ENOSYS;
+        }
+        priv->virt_handle = vgpu_connect();
+        if (!priv->virt_handle) {
+                dev_err(dev, "failed to connect to server node\n");
+                vgpu_comm_deinit();
+                return -ENOSYS;
+        }
+        err = vgpu_get_constants(gk20a);
+        if (err) {
+                vgpu_comm_deinit();
+                return err;
+        }
+        err = vgpu_pm_init(dev);
+        if (err) {
+                dev_err(dev, "pm init failed");
+                return err;
+        }
+        err = nvgpu_thread_create(&priv->intr_handler, gk20a,
+                        vgpu_intr_thread, "gk20a");
+        if (err)
+                return err;
+        gk20a_debug_init(gk20a, "gpu.0");
+        /* Set DMA parameters to allow larger sgt lists */
+        dev->dma_parms = &l->dma_parms;
+        dma_set_max_seg_size(dev, UINT_MAX);
+        gk20a->gr_idle_timeout_default =
+                        CONFIG_GK20A_DEFAULT_TIMEOUT;
+        gk20a->timeouts_enabled = true;
+        vgpu_create_sysfs(dev);
+        gk20a_init_gr(gk20a);
+        nvgpu_ref_init(&gk20a->refcount);
+        return 0;
+}
+int vgpu_remove(struct platform_device *pdev)
+{
+        struct device *dev = &pdev->dev;
+        struct gk20a *g = get_gk20a(dev);
+        gk20a_dbg_fn("");
+        vgpu_pm_qos_remove(dev);
+        if (g->remove_support)
+                g->remove_support(g);
+        vgpu_comm_deinit();
+        gk20a_sched_ctrl_cleanup(g);
+        gk20a_user_deinit(dev, &nvgpu_class);
+        vgpu_remove_sysfs(dev);
+        gk20a_get_platform(dev)->g = NULL;
+        gk20a_put(g);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
new file mode 100644
index 00000000..ac65dba3
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
@@ -0,0 +1,188 @@
+/*
+ * Virtualized GPU Interfaces
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VIRT_H_
+#define _VIRT_H_
+struct device;
+struct tegra_vgpu_gr_intr_info;
+struct tegra_vgpu_fifo_intr_info;
+struct tegra_vgpu_cmd_msg;
+struct gk20a_platform;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+#include <linux/tegra_gr_comm.h>
+#include <linux/tegra_vgpu.h>
+#include "gk20a/gk20a.h"
+#include "common/linux/platform_gk20a.h"
+#include "common/linux/os_linux.h"
+#include <nvgpu/thread.h>
+struct vgpu_priv_data {
+        u64 virt_handle;
+        struct nvgpu_thread intr_handler;
+        struct tegra_vgpu_constants_params constants;
+};
+static inline
+struct vgpu_priv_data *vgpu_get_priv_data_from_dev(struct device *dev)
+{
+        struct gk20a_platform *plat = gk20a_get_platform(dev);
+        return (struct vgpu_priv_data *)plat->vgpu_priv;
+}
+static inline struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g)
+{
+        return vgpu_get_priv_data_from_dev(dev_from_gk20a(g));
+}
+static inline u64 vgpu_get_handle_from_dev(struct device *dev)
+{
+        struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(dev);
+        if (unlikely(!priv)) {
+                dev_err(dev, "invalid vgpu_priv_data in %s\n", __func__);
+                return INT_MAX;
+        }
+        return priv->virt_handle;
+}
+static inline u64 vgpu_get_handle(struct gk20a *g)
+{
+        return vgpu_get_handle_from_dev(dev_from_gk20a(g));
+}
+int vgpu_pm_prepare_poweroff(struct device *dev);
+int vgpu_pm_finalize_poweron(struct device *dev);
+int vgpu_probe(struct platform_device *dev);
+int vgpu_remove(struct platform_device *dev);
+u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size);
+int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
+int vgpu_gr_nonstall_isr(struct gk20a *g,
+                        struct tegra_vgpu_gr_nonstall_intr_info *info);
+int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
+                        struct gr_ctx_desc **__gr_ctx,
+                        struct vm_gk20a *vm,
+                        u32 class,
+                        u32 flags);
+void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+                        struct gr_ctx_desc *gr_ctx);
+void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
+                        struct tegra_vgpu_sm_esr_info *info);
+int vgpu_gr_init_ctx_state(struct gk20a *g);
+int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info);
+int vgpu_fifo_nonstall_isr(struct gk20a *g,
+                        struct tegra_vgpu_fifo_nonstall_intr_info *info);
+int vgpu_ce2_nonstall_isr(struct gk20a *g,
+                        struct tegra_vgpu_ce2_nonstall_intr_info *info);
+u32 vgpu_ce_get_num_pce(struct gk20a *g);
+int vgpu_init_mm_support(struct gk20a *g);
+int vgpu_init_gr_support(struct gk20a *g);
+int vgpu_init_fifo_support(struct gk20a *g);
+int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value);
+int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
+                size_t size_out);
+int vgpu_gm20b_init_hal(struct gk20a *g);
+int vgpu_gp10b_init_hal(struct gk20a *g);
+int vgpu_init_gpu_characteristics(struct gk20a *g);
+void vgpu_create_sysfs(struct device *dev);
+void vgpu_remove_sysfs(struct device *dev);
+int vgpu_read_ptimer(struct gk20a *g, u64 *value);
+int vgpu_get_timestamps_zipper(struct gk20a *g,
+                u32 source_id, u32 count,
+                struct nvgpu_cpu_time_correlation_sample *samples);
+#else
+static inline int vgpu_pm_prepare_poweroff(struct device *dev)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_pm_finalize_poweron(struct device *dev)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_probe(struct platform_device *dev)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_remove(struct platform_device *dev)
+{
+        return -ENOSYS;
+}
+static inline u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt,
+                                u64 size)
+{
+        return 0;
+}
+static inline int vgpu_gr_isr(struct gk20a *g,
+                        struct tegra_vgpu_gr_intr_info *info)
+{
+        return 0;
+}
+static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
+                                struct gr_ctx_desc **__gr_ctx,
+                                struct vm_gk20a *vm,
+                                u32 class,
+                                u32 flags)
+{
+        return -ENOSYS;
+}
+static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+                                struct gr_ctx_desc *gr_ctx)
+{
+}
+static inline int vgpu_gr_init_ctx_state(struct gk20a *g)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_fifo_isr(struct gk20a *g,
+                        struct tegra_vgpu_fifo_intr_info *info)
+{
+        return 0;
+}
+static inline int vgpu_init_mm_support(struct gk20a *g)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_init_gr_support(struct gk20a *g)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_init_fifo_support(struct gk20a *g)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value)
+{
+        return -ENOSYS;
+}
+static inline int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
+                size_t size_out)
+{
+        return -ENOSYS;
+}
+#endif
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_t19x.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_t19x.h
new file mode 100644
index 00000000..faa5f772
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_t19x.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _VGPU_T19X_H_
+#define _VGPU_T19X_H_
+struct gk20a;
+int vgpu_gv11b_init_hal(struct gk20a *g);
+#define vgpu_t19x_init_hal(g) vgpu_gv11b_init_hal(g)
+#define TEGRA_19x_VGPU_COMPAT_TEGRA "nvidia,gv11b-vgpu"
+extern struct gk20a_platform gv11b_vgpu_tegra_platform;
+#define t19x_vgpu_tegra_platform gv11b_vgpu_tegra_platform
+#endif
author	Deepak Nibade <dnibade@nvidia.com>	2017-11-14 09:43:28 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-17 11:27:19 -0500
commit	b42fb7ba26b565f93118fbdd9e17b42ee6144c5e (patch)
tree	26e2d919f019d15b51bba4d7b5c938f77ad5cff5 /drivers/gpu/nvgpu/common
parent	b7cc3a2aa6c92a09eed43513287c9062f22ad127 (diff)