gpu: nvgpu: support gk20a virtualization

The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Aingara Paramakuru <aparamakuru@nvidia.com> 2014-05-05 21:14:22 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:11:01 -0400
commit: 1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree: 3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
parent: 69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)
1 files changed, 569 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
new file mode 100644
index 00000000..23dec1f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -0,0 +1,569 @@
+/*
+ * Virtualized GPU Fifo
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/dma-mapping.h>
+#include "vgpu/vgpu.h"
+#include "gk20a/hw_fifo_gk20a.h"
+#include "gk20a/hw_ram_gk20a.h"
+static void vgpu_channel_bind(struct channel_gk20a *ch)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_info("bind channel %d", ch->hw_chid);
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
+        msg.handle = platform->virt_handle;
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+        ch->bound = true;
+}
+static void vgpu_channel_unbind(struct channel_gk20a *ch)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+        gk20a_dbg_fn("");
+        if (ch->bound) {
+                struct tegra_vgpu_cmd_msg msg;
+                struct tegra_vgpu_channel_config_params *p =
+                                &msg.params.channel_config;
+                int err;
+                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
+                msg.handle = platform->virt_handle;
+                p->handle = ch->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                WARN_ON(err || msg.ret);
+        }
+        ch->bound = false;
+        /*
+         * if we are agrressive then we can destroy the syncpt
+         * resource at this point
+         * if not, then it will be destroyed at channel_free()
+         */
+        if (ch->sync && ch->sync->aggressive_destroy) {
+                ch->sync->destroy(ch->sync);
+                ch->sync = NULL;
+        }
+}
+static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
+        msg.handle = platform->virt_handle;
+        p->id = ch->hw_chid;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                gk20a_err(dev_from_gk20a(g), "fail");
+                return -ENOMEM;
+        }
+        ch->virt_ctx = p->handle;
+        gk20a_dbg_fn("done");
+        return 0;
+}
+static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
+        msg.handle = platform->virt_handle;
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+static void vgpu_channel_disable(struct channel_gk20a *ch)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
+        msg.handle = platform->virt_handle;
+        p->handle = ch->virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        WARN_ON(err || msg.ret);
+}
+static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
+                                u32 gpfifo_entries)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+        struct device __maybe_unused *d = dev_from_gk20a(ch->g);
+        struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
+        msg.handle = platform->virt_handle;
+        p->handle = ch->virt_ctx;
+        p->gpfifo_va = gpfifo_base;
+        p->num_entries = gpfifo_entries;
+        p->userd_addr = ch->userd_iova;
+        p->iova = mapping ? 1 : 0;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        return (err || msg.ret) ? -ENOMEM : 0;
+}
+static int init_engine_info(struct fifo_gk20a *f)
+{
+        struct fifo_engine_info_gk20a *gr_info;
+        const u32 gr_sw_id = ENGINE_GR_GK20A;
+        gk20a_dbg_fn("");
+        /* all we really care about finding is the graphics entry    */
+        /* especially early on in sim it probably thinks it has more */
+        f->num_engines = 1;
+        gr_info = f->engine_info + gr_sw_id;
+        gr_info->sw_id = gr_sw_id;
+        gr_info->name = "gr";
+        /* FIXME: retrieve this from server */
+        gr_info->runlist_id = 0;
+        return 0;
+}
+static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
+{
+        struct fifo_engine_info_gk20a *engine_info;
+        struct fifo_runlist_info_gk20a *runlist;
+        struct device *d = dev_from_gk20a(g);
+        u32 runlist_id;
+        u32 i;
+        u64 runlist_size;
+        gk20a_dbg_fn("");
+        f->max_runlists = fifo_eng_runlist_base__size_1_v();
+        f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
+                                  f->max_runlists, GFP_KERNEL);
+        if (!f->runlist_info)
+                goto clean_up;
+        engine_info = f->engine_info + ENGINE_GR_GK20A;
+        runlist_id = engine_info->runlist_id;
+        runlist = &f->runlist_info[runlist_id];
+        runlist->active_channels =
+                kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
+                        GFP_KERNEL);
+        if (!runlist->active_channels)
+                goto clean_up_runlist_info;
+        runlist_size  = sizeof(u16) * f->num_channels;
+        for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+                dma_addr_t iova;
+                runlist->mem[i].cpuva =
+                        dma_alloc_coherent(d,
+                                        runlist_size,
+                                        &iova,
+                                        GFP_KERNEL);
+                if (!runlist->mem[i].cpuva) {
+                        dev_err(d, "memory allocation failed\n");
+                        goto clean_up_runlist;
+                }
+                runlist->mem[i].iova = iova;
+                runlist->mem[i].size = runlist_size;
+        }
+        mutex_init(&runlist->mutex);
+        init_waitqueue_head(&runlist->runlist_wq);
+        /* None of buffers is pinned if this value doesn't change.
+            Otherwise, one of them (cur_buffer) must have been pinned. */
+        runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+        gk20a_dbg_fn("done");
+        return 0;
+clean_up_runlist:
+        for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+                if (runlist->mem[i].cpuva)
+                        dma_free_coherent(d,
+                                runlist->mem[i].size,
+                                runlist->mem[i].cpuva,
+                                runlist->mem[i].iova);
+                runlist->mem[i].cpuva = NULL;
+                runlist->mem[i].iova = 0;
+        }
+        kfree(runlist->active_channels);
+        runlist->active_channels = NULL;
+clean_up_runlist_info:
+        kfree(f->runlist_info);
+        f->runlist_info = NULL;
+clean_up:
+        gk20a_dbg_fn("fail");
+        return -ENOMEM;
+}
+static int vgpu_init_fifo_setup_sw(struct gk20a *g)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct fifo_gk20a *f = &g->fifo;
+        struct device *d = dev_from_gk20a(g);
+        int chid, err = 0;
+        dma_addr_t iova;
+        gk20a_dbg_fn("");
+        if (f->sw_ready) {
+                gk20a_dbg_fn("skip init");
+                return 0;
+        }
+        f->g = g;
+        err = vgpu_get_attribute(platform->virt_handle,
+                                TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
+                                &f->num_channels);
+        if (err)
+                return -ENXIO;
+        f->max_engines = ENGINE_INVAL_GK20A;
+        f->userd_entry_size = 1 << ram_userd_base_shift_v();
+        f->userd_total_size = f->userd_entry_size * f->num_channels;
+        f->userd.cpuva = dma_alloc_coherent(d,
+                                        f->userd_total_size,
+                                        &iova,
+                                        GFP_KERNEL);
+        if (!f->userd.cpuva) {
+                dev_err(d, "memory allocation failed\n");
+                goto clean_up;
+        }
+        f->userd.iova = iova;
+        err = gk20a_get_sgtable(d, &f->userd.sgt,
+                                f->userd.cpuva, f->userd.iova,
+                                f->userd_total_size);
+        if (err) {
+                dev_err(d, "failed to create sg table\n");
+                goto clean_up;
+        }
+        /* bar1 va */
+        f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
+        if (!f->userd.gpu_va) {
+                dev_err(d, "gmmu mapping failed\n");
+                goto clean_up;
+        }
+        gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
+        f->userd.size = f->userd_total_size;
+        f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
+                                GFP_KERNEL);
+        f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
+                                GFP_KERNEL);
+        if (!(f->channel && f->engine_info)) {
+                err = -ENOMEM;
+                goto clean_up;
+        }
+        init_engine_info(f);
+        init_runlist(g, f);
+        for (chid = 0; chid < f->num_channels; chid++) {
+                f->channel[chid].userd_cpu_va =
+                        f->userd.cpuva + chid * f->userd_entry_size;
+                f->channel[chid].userd_iova =
+                        NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
+                                + chid * f->userd_entry_size;
+                f->channel[chid].userd_gpu_va =
+                        f->userd.gpu_va + chid * f->userd_entry_size;
+                gk20a_init_channel_support(g, chid);
+        }
+        mutex_init(&f->ch_inuse_mutex);
+        f->deferred_reset_pending = false;
+        mutex_init(&f->deferred_reset_mutex);
+        f->sw_ready = true;
+        gk20a_dbg_fn("done");
+        return 0;
+clean_up:
+        gk20a_dbg_fn("fail");
+        /* FIXME: unmap from bar1 */
+        if (f->userd.sgt)
+                gk20a_free_sgtable(&f->userd.sgt);
+        if (f->userd.cpuva)
+                dma_free_coherent(d,
+                                f->userd_total_size,
+                                f->userd.cpuva,
+                                f->userd.iova);
+        f->userd.cpuva = NULL;
+        f->userd.iova = 0;
+        memset(&f->userd, 0, sizeof(struct userd_desc));
+        kfree(f->channel);
+        f->channel = NULL;
+        kfree(f->engine_info);
+        f->engine_info = NULL;
+        return err;
+}
+static int vgpu_init_fifo_setup_hw(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        /* test write, read through bar1 @ userd region before
+         * turning on the snooping */
+        {
+                struct fifo_gk20a *f = &g->fifo;
+                u32 v, v1 = 0x33, v2 = 0x55;
+                u32 bar1_vaddr = f->userd.gpu_va;
+                volatile u32 *cpu_vaddr = f->userd.cpuva;
+                gk20a_dbg_info("test bar1 @ vaddr 0x%x",
+                           bar1_vaddr);
+                v = gk20a_bar1_readl(g, bar1_vaddr);
+                *cpu_vaddr = v1;
+                smp_mb();
+                if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
+                        gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
+                        return -EINVAL;
+                }
+                gk20a_bar1_writel(g, bar1_vaddr, v2);
+                if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
+                        gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
+                        return -EINVAL;
+                }
+                /* is it visible to the cpu? */
+                if (*cpu_vaddr != v2) {
+                        gk20a_err(dev_from_gk20a(g),
+                                "cpu didn't see bar1 write @ %p!",
+                                cpu_vaddr);
+                }
+                /* put it back */
+                gk20a_bar1_writel(g, bar1_vaddr, v);
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+}
+int vgpu_init_fifo_support(struct gk20a *g)
+{
+        u32 err;
+        gk20a_dbg_fn("");
+        err = vgpu_init_fifo_setup_sw(g);
+        if (err)
+                return err;
+        err = vgpu_init_fifo_setup_hw(g);
+        return err;
+}
+static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct fifo_gk20a *f = &g->fifo;
+        struct tegra_vgpu_cmd_msg msg;
+        struct tegra_vgpu_channel_config_params *p =
+                        &msg.params.channel_config;
+        int err;
+        gk20a_dbg_fn("");
+        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
+        msg.handle = platform->virt_handle;
+        p->handle = f->channel[hw_chid].virt_ctx;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        if (err || msg.ret) {
+                gk20a_err(dev_from_gk20a(g),
+                        "preempt channel %d failed\n", hw_chid);
+                err = -ENOMEM;
+        }
+        return err;
+}
+static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
+                        u32 num_entries)
+{
+        struct tegra_vgpu_cmd_msg *msg;
+        struct tegra_vgpu_runlist_params *p;
+        size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries;
+        char *ptr;
+        int err;
+        msg = kmalloc(size, GFP_KERNEL);
+        if (!msg)
+                return -1;
+        msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
+        msg->handle = handle;
+        p = &msg->params.runlist;
+        p->runlist_id = runlist_id;
+        p->num_entries = num_entries;
+        ptr = (char *)msg + sizeof(*msg);
+        memcpy(ptr, runlist, sizeof(*runlist) * num_entries);
+        err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
+        err = (err || msg->ret) ? -1 : 0;
+        kfree(msg);
+        return err;
+}
+static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+                                        u32 hw_chid, bool add,
+                                        bool wait_for_finish)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct fifo_gk20a *f = &g->fifo;
+        struct fifo_runlist_info_gk20a *runlist;
+        u16 *runlist_entry = NULL;
+        u32 count = 0;
+        gk20a_dbg_fn("");
+        runlist = &f->runlist_info[runlist_id];
+        /* valid channel, add/remove it from active list.
+           Otherwise, keep active list untouched for suspend/resume. */
+        if (hw_chid != ~0) {
+                if (add) {
+                        if (test_and_set_bit(hw_chid,
+                                runlist->active_channels) == 1)
+                                return 0;
+                } else {
+                        if (test_and_clear_bit(hw_chid,
+                                runlist->active_channels) == 0)
+                                return 0;
+                }
+        }
+        if (hw_chid != ~0 || /* add/remove a valid channel */
+            add /* resume to add all channels back */) {
+                u32 chid;
+                runlist_entry = runlist->mem[0].cpuva;
+                for_each_set_bit(chid,
+                        runlist->active_channels, f->num_channels) {
+                        gk20a_dbg_info("add channel %d to runlist", chid);
+                        runlist_entry[0] = chid;
+                        runlist_entry++;
+                        count++;
+                }
+        } else  /* suspend to remove all channels */
+                count = 0;
+        return vgpu_submit_runlist(platform->virt_handle, runlist_id,
+                                runlist->mem[0].cpuva, count);
+}
+/* add/remove a channel from runlist
+   special cases below: runlist->active_channels will NOT be changed.
+   (hw_chid == ~0 && !add) means remove all active channels from runlist.
+   (hw_chid == ~0 &&  add) means restore all active channels on runlist. */
+static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
+                                u32 hw_chid, bool add, bool wait_for_finish)
+{
+        struct fifo_runlist_info_gk20a *runlist = NULL;
+        struct fifo_gk20a *f = &g->fifo;
+        u32 ret = 0;
+        gk20a_dbg_fn("");
+        runlist = &f->runlist_info[runlist_id];
+        mutex_lock(&runlist->mutex);
+        ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
+                                        wait_for_finish);
+        mutex_unlock(&runlist->mutex);
+        return ret;
+}
+static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
+{
+        gk20a_dbg_fn("");
+        return 0;
+}
+void vgpu_init_fifo_ops(struct gpu_ops *gops)
+{
+        gops->fifo.bind_channel = vgpu_channel_bind;
+        gops->fifo.unbind_channel = vgpu_channel_unbind;
+        gops->fifo.disable_channel = vgpu_channel_disable;
+        gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
+        gops->fifo.free_inst = vgpu_channel_free_inst;
+        gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
+        gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
+        gops->fifo.update_runlist = vgpu_fifo_update_runlist;
+        gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
+}
author	Aingara Paramakuru <aparamakuru@nvidia.com>	2014-05-05 21:14:22 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:11:01 -0400
commit	1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree	3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
parent	69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c new file mode 100644 index 00000000..23dec1f3 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -0,0 +1,569 @@
	1	/*
	2	* Virtualized GPU Fifo
	3	*
	4	* Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*/
	15
	16	#include <linux/dma-mapping.h>
	17	#include "vgpu/vgpu.h"
	18	#include "gk20a/hw_fifo_gk20a.h"
	19	#include "gk20a/hw_ram_gk20a.h"
	20
	21	static void vgpu_channel_bind(struct channel_gk20a *ch)
	22	{
	23	struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
	24	struct tegra_vgpu_cmd_msg msg;
	25	struct tegra_vgpu_channel_config_params *p =
	26	&msg.params.channel_config;
	27	int err;
	28
	29	gk20a_dbg_info("bind channel %d", ch->hw_chid);
	30
	31	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
	32	msg.handle = platform->virt_handle;
	33	p->handle = ch->virt_ctx;
	34	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	35	WARN_ON(err \|\| msg.ret);
	36
	37	ch->bound = true;
	38	}
	39
	40	static void vgpu_channel_unbind(struct channel_gk20a *ch)
	41	{
	42	struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
	43
	44	gk20a_dbg_fn("");
	45
	46	if (ch->bound) {
	47	struct tegra_vgpu_cmd_msg msg;
	48	struct tegra_vgpu_channel_config_params *p =
	49	&msg.params.channel_config;
	50	int err;
	51
	52	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
	53	msg.handle = platform->virt_handle;
	54	p->handle = ch->virt_ctx;
	55	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	56	WARN_ON(err \|\| msg.ret);
	57	}
	58
	59	ch->bound = false;
	60
	61	/*
	62	* if we are agrressive then we can destroy the syncpt
	63	* resource at this point
	64	* if not, then it will be destroyed at channel_free()
	65	*/
	66	if (ch->sync && ch->sync->aggressive_destroy) {
	67	ch->sync->destroy(ch->sync);
	68	ch->sync = NULL;
	69	}
	70	}
	71
	72	static int vgpu_channel_alloc_inst(struct gk20a g, struct channel_gk20a ch)
	73	{
	74	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	75	struct tegra_vgpu_cmd_msg msg;
	76	struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
	77	int err;
	78
	79	gk20a_dbg_fn("");
	80
	81	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
	82	msg.handle = platform->virt_handle;
	83	p->id = ch->hw_chid;
	84	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	85	if (err \|\| msg.ret) {
	86	gk20a_err(dev_from_gk20a(g), "fail");
	87	return -ENOMEM;
	88	}
	89
	90	ch->virt_ctx = p->handle;
	91	gk20a_dbg_fn("done");
	92	return 0;
	93	}
	94
	95	static void vgpu_channel_free_inst(struct gk20a g, struct channel_gk20a ch)
	96	{
	97	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	98	struct tegra_vgpu_cmd_msg msg;
	99	struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
	100	int err;
	101
	102	gk20a_dbg_fn("");
	103
	104	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
	105	msg.handle = platform->virt_handle;
	106	p->handle = ch->virt_ctx;
	107	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	108	WARN_ON(err \|\| msg.ret);
	109	}
	110
	111	static void vgpu_channel_disable(struct channel_gk20a *ch)
	112	{
	113	struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
	114	struct tegra_vgpu_cmd_msg msg;
	115	struct tegra_vgpu_channel_config_params *p =
	116	&msg.params.channel_config;
	117	int err;
	118
	119	gk20a_dbg_fn("");
	120
	121	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
	122	msg.handle = platform->virt_handle;
	123	p->handle = ch->virt_ctx;
	124	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	125	WARN_ON(err \|\| msg.ret);
	126	}
	127
	128	static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
	129	u32 gpfifo_entries)
	130	{
	131	struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
	132	struct device __maybe_unused *d = dev_from_gk20a(ch->g);
	133	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
	134	struct tegra_vgpu_cmd_msg msg;
	135	struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
	136	int err;
	137
	138	gk20a_dbg_fn("");
	139
	140	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
	141	msg.handle = platform->virt_handle;
	142	p->handle = ch->virt_ctx;
	143	p->gpfifo_va = gpfifo_base;
	144	p->num_entries = gpfifo_entries;
	145	p->userd_addr = ch->userd_iova;
	146	p->iova = mapping ? 1 : 0;
	147	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	148
	149	return (err \|\| msg.ret) ? -ENOMEM : 0;
	150	}
	151
	152	static int init_engine_info(struct fifo_gk20a *f)
	153	{
	154	struct fifo_engine_info_gk20a *gr_info;
	155	const u32 gr_sw_id = ENGINE_GR_GK20A;
	156
	157	gk20a_dbg_fn("");
	158
	159	/* all we really care about finding is the graphics entry */
	160	/* especially early on in sim it probably thinks it has more */
	161	f->num_engines = 1;
	162
	163	gr_info = f->engine_info + gr_sw_id;
	164
	165	gr_info->sw_id = gr_sw_id;
	166	gr_info->name = "gr";
	167	/* FIXME: retrieve this from server */
	168	gr_info->runlist_id = 0;
	169	return 0;
	170	}
	171
	172	static int init_runlist(struct gk20a g, struct fifo_gk20a f)
	173	{
	174	struct fifo_engine_info_gk20a *engine_info;
	175	struct fifo_runlist_info_gk20a *runlist;
	176	struct device *d = dev_from_gk20a(g);
	177	u32 runlist_id;
	178	u32 i;
	179	u64 runlist_size;
	180
	181	gk20a_dbg_fn("");
	182
	183	f->max_runlists = fifo_eng_runlist_base__size_1_v();
	184	f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
	185	f->max_runlists, GFP_KERNEL);
	186	if (!f->runlist_info)
	187	goto clean_up;
	188
	189	engine_info = f->engine_info + ENGINE_GR_GK20A;
	190	runlist_id = engine_info->runlist_id;
	191	runlist = &f->runlist_info[runlist_id];
	192
	193	runlist->active_channels =
	194	kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
	195	GFP_KERNEL);
	196	if (!runlist->active_channels)
	197	goto clean_up_runlist_info;
	198
	199	runlist_size = sizeof(u16) * f->num_channels;
	200	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
	201	dma_addr_t iova;
	202
	203	runlist->mem[i].cpuva =
	204	dma_alloc_coherent(d,
	205	runlist_size,
	206	&iova,
	207	GFP_KERNEL);
	208	if (!runlist->mem[i].cpuva) {
	209	dev_err(d, "memory allocation failed\n");
	210	goto clean_up_runlist;
	211	}
	212	runlist->mem[i].iova = iova;
	213	runlist->mem[i].size = runlist_size;
	214	}
	215	mutex_init(&runlist->mutex);
	216	init_waitqueue_head(&runlist->runlist_wq);
	217
	218	/* None of buffers is pinned if this value doesn't change.
	219	Otherwise, one of them (cur_buffer) must have been pinned. */
	220	runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
	221
	222	gk20a_dbg_fn("done");
	223	return 0;
	224
	225	clean_up_runlist:
	226	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
	227	if (runlist->mem[i].cpuva)
	228	dma_free_coherent(d,
	229	runlist->mem[i].size,
	230	runlist->mem[i].cpuva,
	231	runlist->mem[i].iova);
	232	runlist->mem[i].cpuva = NULL;
	233	runlist->mem[i].iova = 0;
	234	}
	235
	236	kfree(runlist->active_channels);
	237	runlist->active_channels = NULL;
	238
	239	clean_up_runlist_info:
	240	kfree(f->runlist_info);
	241	f->runlist_info = NULL;
	242
	243	clean_up:
	244	gk20a_dbg_fn("fail");
	245	return -ENOMEM;
	246	}
	247
	248	static int vgpu_init_fifo_setup_sw(struct gk20a *g)
	249	{
	250	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	251	struct fifo_gk20a *f = &g->fifo;
	252	struct device *d = dev_from_gk20a(g);
	253	int chid, err = 0;
	254	dma_addr_t iova;
	255
	256	gk20a_dbg_fn("");
	257
	258	if (f->sw_ready) {
	259	gk20a_dbg_fn("skip init");
	260	return 0;
	261	}
	262
	263	f->g = g;
	264
	265	err = vgpu_get_attribute(platform->virt_handle,
	266	TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
	267	&f->num_channels);
	268	if (err)
	269	return -ENXIO;
	270
	271	f->max_engines = ENGINE_INVAL_GK20A;
	272
	273	f->userd_entry_size = 1 << ram_userd_base_shift_v();
	274	f->userd_total_size = f->userd_entry_size * f->num_channels;
	275
	276	f->userd.cpuva = dma_alloc_coherent(d,
	277	f->userd_total_size,
	278	&iova,
	279	GFP_KERNEL);
	280	if (!f->userd.cpuva) {
	281	dev_err(d, "memory allocation failed\n");
	282	goto clean_up;
	283	}
	284
	285	f->userd.iova = iova;
	286	err = gk20a_get_sgtable(d, &f->userd.sgt,
	287	f->userd.cpuva, f->userd.iova,
	288	f->userd_total_size);
	289	if (err) {
	290	dev_err(d, "failed to create sg table\n");
	291	goto clean_up;
	292	}
	293
	294	/* bar1 va */
	295	f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
	296	if (!f->userd.gpu_va) {
	297	dev_err(d, "gmmu mapping failed\n");
	298	goto clean_up;
	299	}
	300
	301	gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
	302
	303	f->userd.size = f->userd_total_size;
	304
	305	f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
	306	GFP_KERNEL);
	307	f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
	308	GFP_KERNEL);
	309
	310	if (!(f->channel && f->engine_info)) {
	311	err = -ENOMEM;
	312	goto clean_up;
	313	}
	314
	315	init_engine_info(f);
	316
	317	init_runlist(g, f);
	318
	319	for (chid = 0; chid < f->num_channels; chid++) {
	320	f->channel[chid].userd_cpu_va =
	321	f->userd.cpuva + chid * f->userd_entry_size;
	322	f->channel[chid].userd_iova =
	323	NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
	324	+ chid * f->userd_entry_size;
	325	f->channel[chid].userd_gpu_va =
	326	f->userd.gpu_va + chid * f->userd_entry_size;
	327
	328	gk20a_init_channel_support(g, chid);
	329	}
	330	mutex_init(&f->ch_inuse_mutex);
	331
	332	f->deferred_reset_pending = false;
	333	mutex_init(&f->deferred_reset_mutex);
	334
	335	f->sw_ready = true;
	336
	337	gk20a_dbg_fn("done");
	338	return 0;
	339
	340	clean_up:
	341	gk20a_dbg_fn("fail");
	342	/* FIXME: unmap from bar1 */
	343	if (f->userd.sgt)
	344	gk20a_free_sgtable(&f->userd.sgt);
	345	if (f->userd.cpuva)
	346	dma_free_coherent(d,
	347	f->userd_total_size,
	348	f->userd.cpuva,
	349	f->userd.iova);
	350	f->userd.cpuva = NULL;
	351	f->userd.iova = 0;
	352
	353	memset(&f->userd, 0, sizeof(struct userd_desc));
	354
	355	kfree(f->channel);
	356	f->channel = NULL;
	357	kfree(f->engine_info);
	358	f->engine_info = NULL;
	359
	360	return err;
	361	}
	362
	363	static int vgpu_init_fifo_setup_hw(struct gk20a *g)
	364	{
	365	gk20a_dbg_fn("");
	366
	367	/* test write, read through bar1 @ userd region before
	368	* turning on the snooping */
	369	{
	370	struct fifo_gk20a *f = &g->fifo;
	371	u32 v, v1 = 0x33, v2 = 0x55;
	372
	373	u32 bar1_vaddr = f->userd.gpu_va;
	374	volatile u32 *cpu_vaddr = f->userd.cpuva;
	375
	376	gk20a_dbg_info("test bar1 @ vaddr 0x%x",
	377	bar1_vaddr);
	378
	379	v = gk20a_bar1_readl(g, bar1_vaddr);
	380
	381	*cpu_vaddr = v1;
	382	smp_mb();
	383
	384	if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
	385	gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
	386	return -EINVAL;
	387	}
	388
	389	gk20a_bar1_writel(g, bar1_vaddr, v2);
	390
	391	if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
	392	gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
	393	return -EINVAL;
	394	}
	395
	396	/* is it visible to the cpu? */
	397	if (*cpu_vaddr != v2) {
	398	gk20a_err(dev_from_gk20a(g),
	399	"cpu didn't see bar1 write @ %p!",
	400	cpu_vaddr);
	401	}
	402
	403	/* put it back */
	404	gk20a_bar1_writel(g, bar1_vaddr, v);
	405	}
	406
	407	gk20a_dbg_fn("done");
	408
	409	return 0;
	410	}
	411
	412	int vgpu_init_fifo_support(struct gk20a *g)
	413	{
	414	u32 err;
	415
	416	gk20a_dbg_fn("");
	417
	418	err = vgpu_init_fifo_setup_sw(g);
	419	if (err)
	420	return err;
	421
	422	err = vgpu_init_fifo_setup_hw(g);
	423	return err;
	424	}
	425
	426	static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
	427	{
	428	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	429	struct fifo_gk20a *f = &g->fifo;
	430	struct tegra_vgpu_cmd_msg msg;
	431	struct tegra_vgpu_channel_config_params *p =
	432	&msg.params.channel_config;
	433	int err;
	434
	435	gk20a_dbg_fn("");
	436
	437	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
	438	msg.handle = platform->virt_handle;
	439	p->handle = f->channel[hw_chid].virt_ctx;
	440	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
	441
	442	if (err \|\| msg.ret) {
	443	gk20a_err(dev_from_gk20a(g),
	444	"preempt channel %d failed\n", hw_chid);
	445	err = -ENOMEM;
	446	}
	447
	448	return err;
	449	}
	450
	451	static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
	452	u32 num_entries)
	453	{
	454	struct tegra_vgpu_cmd_msg *msg;
	455	struct tegra_vgpu_runlist_params *p;
	456	size_t size = sizeof(msg) + sizeof(runlist) * num_entries;
	457	char *ptr;
	458	int err;
	459
	460	msg = kmalloc(size, GFP_KERNEL);
	461	if (!msg)
	462	return -1;
	463
	464	msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
	465	msg->handle = handle;
	466	p = &msg->params.runlist;
	467	p->runlist_id = runlist_id;
	468	p->num_entries = num_entries;
	469
	470	ptr = (char )msg + sizeof(msg);
	471	memcpy(ptr, runlist, sizeof(runlist) num_entries);
	472	err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
	473
	474	err = (err \|\| msg->ret) ? -1 : 0;
	475	kfree(msg);
	476	return err;
	477	}
	478
	479	static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
	480	u32 hw_chid, bool add,
	481	bool wait_for_finish)
	482	{
	483	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	484	struct fifo_gk20a *f = &g->fifo;
	485	struct fifo_runlist_info_gk20a *runlist;
	486	u16 *runlist_entry = NULL;
	487	u32 count = 0;
	488
	489	gk20a_dbg_fn("");
	490
	491	runlist = &f->runlist_info[runlist_id];
	492
	493	/* valid channel, add/remove it from active list.
	494	Otherwise, keep active list untouched for suspend/resume. */
	495	if (hw_chid != ~0) {
	496	if (add) {
	497	if (test_and_set_bit(hw_chid,
	498	runlist->active_channels) == 1)
	499	return 0;
	500	} else {
	501	if (test_and_clear_bit(hw_chid,
	502	runlist->active_channels) == 0)
	503	return 0;
	504	}
	505	}
	506
	507	if (hw_chid != ~0 \|\| /* add/remove a valid channel */
	508	add /* resume to add all channels back */) {
	509	u32 chid;
	510
	511	runlist_entry = runlist->mem[0].cpuva;
	512	for_each_set_bit(chid,
	513	runlist->active_channels, f->num_channels) {
	514	gk20a_dbg_info("add channel %d to runlist", chid);
	515	runlist_entry[0] = chid;
	516	runlist_entry++;
	517	count++;
	518	}
	519	} else /* suspend to remove all channels */
	520	count = 0;
	521
	522	return vgpu_submit_runlist(platform->virt_handle, runlist_id,
	523	runlist->mem[0].cpuva, count);
	524	}
	525
	526	/* add/remove a channel from runlist
	527	special cases below: runlist->active_channels will NOT be changed.
	528	(hw_chid == ~0 && !add) means remove all active channels from runlist.
	529	(hw_chid == ~0 && add) means restore all active channels on runlist. */
	530	static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
	531	u32 hw_chid, bool add, bool wait_for_finish)
	532	{
	533	struct fifo_runlist_info_gk20a *runlist = NULL;
	534	struct fifo_gk20a *f = &g->fifo;
	535	u32 ret = 0;
	536
	537	gk20a_dbg_fn("");
	538
	539	runlist = &f->runlist_info[runlist_id];
	540
	541	mutex_lock(&runlist->mutex);
	542
	543	ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
	544	wait_for_finish);
	545
	546	mutex_unlock(&runlist->mutex);
	547	return ret;
	548	}
	549
	550	static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
	551	{
	552	gk20a_dbg_fn("");
	553
	554	return 0;
	555	}
	556
	557	void vgpu_init_fifo_ops(struct gpu_ops *gops)
	558	{
	559	gops->fifo.bind_channel = vgpu_channel_bind;
	560	gops->fifo.unbind_channel = vgpu_channel_unbind;
	561	gops->fifo.disable_channel = vgpu_channel_disable;
	562	gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
	563	gops->fifo.free_inst = vgpu_channel_free_inst;
	564	gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
	565	gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
	566	gops->fifo.update_runlist = vgpu_fifo_update_runlist;
	567	gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
	568	}
	569