From 1fd722f592c2e0523c5e399a2406a4e387057188 Mon Sep 17 00:00:00 2001 From: Aingara Paramakuru Date: Mon, 5 May 2014 21:14:22 -0400 Subject: gpu: nvgpu: support gk20a virtualization The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 569 +++++++++++++++++++++++++++++++++++++ 1 file changed, 569 insertions(+) create mode 100644 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c (limited to 'drivers/gpu/nvgpu/vgpu/fifo_vgpu.c') diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c new file mode 100644 index 00000000..23dec1f3 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -0,0 +1,569 @@ +/* + * Virtualized GPU Fifo + * + * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "vgpu/vgpu.h" +#include "gk20a/hw_fifo_gk20a.h" +#include "gk20a/hw_ram_gk20a.h" + +static void vgpu_channel_bind(struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + gk20a_dbg_info("bind channel %d", ch->hw_chid); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + ch->bound = true; +} + +static void vgpu_channel_unbind(struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + + gk20a_dbg_fn(""); + + if (ch->bound) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + } + + ch->bound = false; + + /* + * if we are agrressive then we can destroy the syncpt + * resource at this point + * if not, then it will be destroyed at channel_free() + */ + if (ch->sync && ch->sync->aggressive_destroy) { + ch->sync->destroy(ch->sync); + ch->sync = NULL; + } +} + +static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX; + msg.handle = platform->virt_handle; + p->id = ch->hw_chid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + gk20a_err(dev_from_gk20a(g), "fail"); + return -ENOMEM; + } + + ch->virt_ctx = p->handle; + gk20a_dbg_fn("done"); + return 0; +} + +static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +static void vgpu_channel_disable(struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base, + u32 gpfifo_entries) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct device __maybe_unused *d = dev_from_gk20a(ch->g); + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + p->gpfifo_va = gpfifo_base; + p->num_entries = gpfifo_entries; + p->userd_addr = ch->userd_iova; + p->iova = mapping ? 1 : 0; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -ENOMEM : 0; +} + +static int init_engine_info(struct fifo_gk20a *f) +{ + struct fifo_engine_info_gk20a *gr_info; + const u32 gr_sw_id = ENGINE_GR_GK20A; + + gk20a_dbg_fn(""); + + /* all we really care about finding is the graphics entry */ + /* especially early on in sim it probably thinks it has more */ + f->num_engines = 1; + + gr_info = f->engine_info + gr_sw_id; + + gr_info->sw_id = gr_sw_id; + gr_info->name = "gr"; + /* FIXME: retrieve this from server */ + gr_info->runlist_id = 0; + return 0; +} + +static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) +{ + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + struct device *d = dev_from_gk20a(g); + u32 runlist_id; + u32 i; + u64 runlist_size; + + gk20a_dbg_fn(""); + + f->max_runlists = fifo_eng_runlist_base__size_1_v(); + f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * + f->max_runlists, GFP_KERNEL); + if (!f->runlist_info) + goto clean_up; + + engine_info = f->engine_info + ENGINE_GR_GK20A; + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + runlist->active_channels = + kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), + GFP_KERNEL); + if (!runlist->active_channels) + goto clean_up_runlist_info; + + runlist_size = sizeof(u16) * f->num_channels; + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + dma_addr_t iova; + + runlist->mem[i].cpuva = + dma_alloc_coherent(d, + runlist_size, + &iova, + GFP_KERNEL); + if (!runlist->mem[i].cpuva) { + dev_err(d, "memory allocation failed\n"); + goto clean_up_runlist; + } + runlist->mem[i].iova = iova; + runlist->mem[i].size = runlist_size; + } + mutex_init(&runlist->mutex); + init_waitqueue_head(&runlist->runlist_wq); + + /* None of buffers is pinned if this value doesn't change. + Otherwise, one of them (cur_buffer) must have been pinned. */ + runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + + gk20a_dbg_fn("done"); + return 0; + +clean_up_runlist: + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + if (runlist->mem[i].cpuva) + dma_free_coherent(d, + runlist->mem[i].size, + runlist->mem[i].cpuva, + runlist->mem[i].iova); + runlist->mem[i].cpuva = NULL; + runlist->mem[i].iova = 0; + } + + kfree(runlist->active_channels); + runlist->active_channels = NULL; + +clean_up_runlist_info: + kfree(f->runlist_info); + f->runlist_info = NULL; + +clean_up: + gk20a_dbg_fn("fail"); + return -ENOMEM; +} + +static int vgpu_init_fifo_setup_sw(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct fifo_gk20a *f = &g->fifo; + struct device *d = dev_from_gk20a(g); + int chid, err = 0; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (f->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + f->g = g; + + err = vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_NUM_CHANNELS, + &f->num_channels); + if (err) + return -ENXIO; + + f->max_engines = ENGINE_INVAL_GK20A; + + f->userd_entry_size = 1 << ram_userd_base_shift_v(); + f->userd_total_size = f->userd_entry_size * f->num_channels; + + f->userd.cpuva = dma_alloc_coherent(d, + f->userd_total_size, + &iova, + GFP_KERNEL); + if (!f->userd.cpuva) { + dev_err(d, "memory allocation failed\n"); + goto clean_up; + } + + f->userd.iova = iova; + err = gk20a_get_sgtable(d, &f->userd.sgt, + f->userd.cpuva, f->userd.iova, + f->userd_total_size); + if (err) { + dev_err(d, "failed to create sg table\n"); + goto clean_up; + } + + /* bar1 va */ + f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size); + if (!f->userd.gpu_va) { + dev_err(d, "gmmu mapping failed\n"); + goto clean_up; + } + + gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); + + f->userd.size = f->userd_total_size; + + f->channel = kzalloc(f->num_channels * sizeof(*f->channel), + GFP_KERNEL); + f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), + GFP_KERNEL); + + if (!(f->channel && f->engine_info)) { + err = -ENOMEM; + goto clean_up; + } + + init_engine_info(f); + + init_runlist(g, f); + + for (chid = 0; chid < f->num_channels; chid++) { + f->channel[chid].userd_cpu_va = + f->userd.cpuva + chid * f->userd_entry_size; + f->channel[chid].userd_iova = + NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) + + chid * f->userd_entry_size; + f->channel[chid].userd_gpu_va = + f->userd.gpu_va + chid * f->userd_entry_size; + + gk20a_init_channel_support(g, chid); + } + mutex_init(&f->ch_inuse_mutex); + + f->deferred_reset_pending = false; + mutex_init(&f->deferred_reset_mutex); + + f->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; + +clean_up: + gk20a_dbg_fn("fail"); + /* FIXME: unmap from bar1 */ + if (f->userd.sgt) + gk20a_free_sgtable(&f->userd.sgt); + if (f->userd.cpuva) + dma_free_coherent(d, + f->userd_total_size, + f->userd.cpuva, + f->userd.iova); + f->userd.cpuva = NULL; + f->userd.iova = 0; + + memset(&f->userd, 0, sizeof(struct userd_desc)); + + kfree(f->channel); + f->channel = NULL; + kfree(f->engine_info); + f->engine_info = NULL; + + return err; +} + +static int vgpu_init_fifo_setup_hw(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + /* test write, read through bar1 @ userd region before + * turning on the snooping */ + { + struct fifo_gk20a *f = &g->fifo; + u32 v, v1 = 0x33, v2 = 0x55; + + u32 bar1_vaddr = f->userd.gpu_va; + volatile u32 *cpu_vaddr = f->userd.cpuva; + + gk20a_dbg_info("test bar1 @ vaddr 0x%x", + bar1_vaddr); + + v = gk20a_bar1_readl(g, bar1_vaddr); + + *cpu_vaddr = v1; + smp_mb(); + + if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { + gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); + return -EINVAL; + } + + gk20a_bar1_writel(g, bar1_vaddr, v2); + + if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) { + gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); + return -EINVAL; + } + + /* is it visible to the cpu? */ + if (*cpu_vaddr != v2) { + gk20a_err(dev_from_gk20a(g), + "cpu didn't see bar1 write @ %p!", + cpu_vaddr); + } + + /* put it back */ + gk20a_bar1_writel(g, bar1_vaddr, v); + } + + gk20a_dbg_fn("done"); + + return 0; +} + +int vgpu_init_fifo_support(struct gk20a *g) +{ + u32 err; + + gk20a_dbg_fn(""); + + err = vgpu_init_fifo_setup_sw(g); + if (err) + return err; + + err = vgpu_init_fifo_setup_hw(g); + return err; +} + +static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct fifo_gk20a *f = &g->fifo; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT; + msg.handle = platform->virt_handle; + p->handle = f->channel[hw_chid].virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + gk20a_err(dev_from_gk20a(g), + "preempt channel %d failed\n", hw_chid); + err = -ENOMEM; + } + + return err; +} + +static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist, + u32 num_entries) +{ + struct tegra_vgpu_cmd_msg *msg; + struct tegra_vgpu_runlist_params *p; + size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries; + char *ptr; + int err; + + msg = kmalloc(size, GFP_KERNEL); + if (!msg) + return -1; + + msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST; + msg->handle = handle; + p = &msg->params.runlist; + p->runlist_id = runlist_id; + p->num_entries = num_entries; + + ptr = (char *)msg + sizeof(*msg); + memcpy(ptr, runlist, sizeof(*runlist) * num_entries); + err = vgpu_comm_sendrecv(msg, size, sizeof(*msg)); + + err = (err || msg->ret) ? -1 : 0; + kfree(msg); + return err; +} + +static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, + bool wait_for_finish) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct fifo_gk20a *f = &g->fifo; + struct fifo_runlist_info_gk20a *runlist; + u16 *runlist_entry = NULL; + u32 count = 0; + + gk20a_dbg_fn(""); + + runlist = &f->runlist_info[runlist_id]; + + /* valid channel, add/remove it from active list. + Otherwise, keep active list untouched for suspend/resume. */ + if (hw_chid != ~0) { + if (add) { + if (test_and_set_bit(hw_chid, + runlist->active_channels) == 1) + return 0; + } else { + if (test_and_clear_bit(hw_chid, + runlist->active_channels) == 0) + return 0; + } + } + + if (hw_chid != ~0 || /* add/remove a valid channel */ + add /* resume to add all channels back */) { + u32 chid; + + runlist_entry = runlist->mem[0].cpuva; + for_each_set_bit(chid, + runlist->active_channels, f->num_channels) { + gk20a_dbg_info("add channel %d to runlist", chid); + runlist_entry[0] = chid; + runlist_entry++; + count++; + } + } else /* suspend to remove all channels */ + count = 0; + + return vgpu_submit_runlist(platform->virt_handle, runlist_id, + runlist->mem[0].cpuva, count); +} + +/* add/remove a channel from runlist + special cases below: runlist->active_channels will NOT be changed. + (hw_chid == ~0 && !add) means remove all active channels from runlist. + (hw_chid == ~0 && add) means restore all active channels on runlist. */ +static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, bool wait_for_finish) +{ + struct fifo_runlist_info_gk20a *runlist = NULL; + struct fifo_gk20a *f = &g->fifo; + u32 ret = 0; + + gk20a_dbg_fn(""); + + runlist = &f->runlist_info[runlist_id]; + + mutex_lock(&runlist->mutex); + + ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add, + wait_for_finish); + + mutex_unlock(&runlist->mutex); + return ret; +} + +static int vgpu_fifo_wait_engine_idle(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + return 0; +} + +void vgpu_init_fifo_ops(struct gpu_ops *gops) +{ + gops->fifo.bind_channel = vgpu_channel_bind; + gops->fifo.unbind_channel = vgpu_channel_unbind; + gops->fifo.disable_channel = vgpu_channel_disable; + gops->fifo.alloc_inst = vgpu_channel_alloc_inst; + gops->fifo.free_inst = vgpu_channel_free_inst; + gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc; + gops->fifo.preempt_channel = vgpu_fifo_preempt_channel; + gops->fifo.update_runlist = vgpu_fifo_update_runlist; + gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle; +} + -- cgit v1.2.2