From 2a2c16af5f9f1ccfc93a13e820d5381e5c881e92 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 18 Apr 2018 12:59:00 -0700 Subject: gpu: nvgpu: Move Linux files away from common Move all Linux source code files to drivers/gpu/nvgpu/os/linux from drivers/gpu/nvgpu/common/linux. This changes the meaning of common to be OS independent. JIRA NVGPU-598 JIRA NVGPU-601 Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1747714 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 1388 ++++++++++++++++++++++++++++ 1 file changed, 1388 insertions(+) create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_channel.c (limited to 'drivers/gpu/nvgpu/os/linux/ioctl_channel.c') diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c new file mode 100644 index 00000000..b04bb9de --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -0,0 +1,1388 @@ +/* + * GK20A Graphics channel + * + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/fence_gk20a.h" + +#include "platform_gk20a.h" +#include "ioctl_channel.h" +#include "channel.h" +#include "os_linux.h" +#include "ctxsw_trace.h" + +/* the minimal size of client buffer */ +#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ + (sizeof(struct gk20a_cs_snapshot_fifo) + \ + sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) + +static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) +{ + switch (graphics_preempt_mode) { + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + return "WFI"; + default: + return "?"; + } +} + +static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode) +{ + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + return "WFI"; + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + return "CTA"; + default: + return "?"; + } +} + +static void gk20a_channel_trace_sched_param( + void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, + const char *graphics_preempt_mode, + const char *compute_preempt_mode), + struct channel_gk20a *ch) +{ + struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); + + if (!tsg) + return; + + (trace)(ch->chid, ch->tsgid, ch->pid, + tsg_gk20a_from_ch(ch)->timeslice_us, + ch->timeout_ms_max, + gk20a_fifo_interleave_level_name(tsg->interleave_level), + gr_gk20a_graphics_preempt_mode_name( + tsg->gr_ctx.graphics_preempt_mode), + gr_gk20a_compute_preempt_mode_name( + tsg->gr_ctx.compute_preempt_mode)); +} + +/* + * Although channels do have pointers back to the gk20a struct that they were + * created under in cases where the driver is killed that pointer can be bad. + * The channel memory can be freed before the release() function for a given + * channel is called. This happens when the driver dies and userspace doesn't + * get a chance to call release() until after the entire gk20a driver data is + * unloaded and freed. + */ +struct channel_priv { + struct gk20a *g; + struct channel_gk20a *c; +}; + +#if defined(CONFIG_GK20A_CYCLE_STATS) + +void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + /* disable existing cyclestats buffer */ + nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); + if (priv->cyclestate_buffer_handler) { + dma_buf_vunmap(priv->cyclestate_buffer_handler, + ch->cyclestate.cyclestate_buffer); + dma_buf_put(priv->cyclestate_buffer_handler); + priv->cyclestate_buffer_handler = NULL; + ch->cyclestate.cyclestate_buffer = NULL; + ch->cyclestate.cyclestate_buffer_size = 0; + } + nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); +} + +static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, + struct nvgpu_cycle_stats_args *args) +{ + struct dma_buf *dmabuf; + void *virtual_address; + struct nvgpu_channel_linux *priv = ch->os_priv; + + /* is it allowed to handle calls for current GPU? */ + if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS)) + return -ENOSYS; + + if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) { + + /* set up new cyclestats buffer */ + dmabuf = dma_buf_get(args->dmabuf_fd); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + virtual_address = dma_buf_vmap(dmabuf); + if (!virtual_address) + return -ENOMEM; + + priv->cyclestate_buffer_handler = dmabuf; + ch->cyclestate.cyclestate_buffer = virtual_address; + ch->cyclestate.cyclestate_buffer_size = dmabuf->size; + return 0; + + } else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) { + gk20a_channel_free_cycle_stats_buffer(ch); + return 0; + + } else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) { + /* no requst from GL */ + return 0; + + } else { + pr_err("channel already has cyclestats buffer\n"); + return -EINVAL; + } +} + +static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) +{ + int ret; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (ch->cs_client) + ret = gr_gk20a_css_flush(ch, ch->cs_client); + else + ret = -EBADF; + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; +} + +static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, + u32 dmabuf_fd, + u32 perfmon_id_count, + u32 *perfmon_id_start) +{ + int ret = 0; + struct gk20a *g = ch->g; + struct gk20a_cs_snapshot_client_linux *client_linux; + struct gk20a_cs_snapshot_client *client; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (ch->cs_client) { + nvgpu_mutex_release(&ch->cs_client_mutex); + return -EEXIST; + } + + client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); + if (!client_linux) { + ret = -ENOMEM; + goto err; + } + + client_linux->dmabuf_fd = dmabuf_fd; + client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); + if (IS_ERR(client_linux->dma_handler)) { + ret = PTR_ERR(client_linux->dma_handler); + client_linux->dma_handler = NULL; + goto err_free; + } + + client = &client_linux->cs_client; + client->snapshot_size = client_linux->dma_handler->size; + if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { + ret = -ENOMEM; + goto err_put; + } + + client->snapshot = (struct gk20a_cs_snapshot_fifo *) + dma_buf_vmap(client_linux->dma_handler); + if (!client->snapshot) { + ret = -ENOMEM; + goto err_put; + } + + ch->cs_client = client; + + ret = gr_gk20a_css_attach(ch, + perfmon_id_count, + perfmon_id_start, + ch->cs_client); + + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; + +err_put: + dma_buf_put(client_linux->dma_handler); +err_free: + nvgpu_kfree(g, client_linux); +err: + nvgpu_mutex_release(&ch->cs_client_mutex); + return ret; +} + +int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) +{ + int ret; + struct gk20a_cs_snapshot_client_linux *client_linux; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (!ch->cs_client) { + nvgpu_mutex_release(&ch->cs_client_mutex); + return 0; + } + + client_linux = container_of(ch->cs_client, + struct gk20a_cs_snapshot_client_linux, + cs_client); + + ret = gr_gk20a_css_detach(ch, ch->cs_client); + + if (client_linux->dma_handler) { + if (ch->cs_client->snapshot) + dma_buf_vunmap(client_linux->dma_handler, + ch->cs_client->snapshot); + dma_buf_put(client_linux->dma_handler); + } + + ch->cs_client = NULL; + nvgpu_kfree(ch->g, client_linux); + + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; +} + +static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, + struct nvgpu_cycle_stats_snapshot_args *args) +{ + int ret; + + /* is it allowed to handle calls for current GPU? */ + if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) + return -ENOSYS; + + if (!args->dmabuf_fd) + return -EINVAL; + + /* handle the command (most frequent cases first) */ + switch (args->cmd) { + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: + ret = gk20a_flush_cycle_stats_snapshot(ch); + args->extra = 0; + break; + + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: + ret = gk20a_attach_cycle_stats_snapshot(ch, + args->dmabuf_fd, + args->extra, + &args->extra); + break; + + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: + ret = gk20a_channel_free_cycle_stats_snapshot(ch); + args->extra = 0; + break; + + default: + pr_err("cyclestats: unknown command %u\n", args->cmd); + ret = -EINVAL; + break; + } + + return ret; +} +#endif + +static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, + struct nvgpu_channel_wdt_args *args) +{ + u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT | + NVGPU_IOCTL_CHANNEL_ENABLE_WDT); + + if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT) + ch->timeout.enabled = false; + else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT) + ch->timeout.enabled = true; + else + return -EINVAL; + + if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT) + ch->timeout.limit_ms = args->timeout_ms; + + ch->timeout.debug_dump = (args->wdt_status & + NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0; + + return 0; +} + +static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); + dma_buf_put(priv->error_notifier.dmabuf); + priv->error_notifier.dmabuf = NULL; + priv->error_notifier.notification = NULL; + priv->error_notifier.vaddr = NULL; + } + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +static int gk20a_init_error_notifier(struct channel_gk20a *ch, + struct nvgpu_set_error_notifier *args) +{ + struct dma_buf *dmabuf; + void *va; + u64 end = args->offset + sizeof(struct nvgpu_notification); + struct nvgpu_channel_linux *priv = ch->os_priv; + + if (!args->mem) { + pr_err("gk20a_init_error_notifier: invalid memory handle\n"); + return -EINVAL; + } + + dmabuf = dma_buf_get(args->mem); + + gk20a_channel_free_error_notifiers(ch); + + if (IS_ERR(dmabuf)) { + pr_err("Invalid handle: %d\n", args->mem); + return -EINVAL; + } + + if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { + dma_buf_put(dmabuf); + nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset"); + return -EINVAL; + } + + nvgpu_speculation_barrier(); + + /* map handle */ + va = dma_buf_vmap(dmabuf); + if (!va) { + dma_buf_put(dmabuf); + pr_err("Cannot map notifier handle\n"); + return -ENOMEM; + } + + priv->error_notifier.notification = va + args->offset; + priv->error_notifier.vaddr = va; + memset(priv->error_notifier.notification, 0, + sizeof(struct nvgpu_notification)); + + /* set channel notifiers pointer */ + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + priv->error_notifier.dmabuf = dmabuf; + nvgpu_mutex_release(&priv->error_notifier.mutex); + + return 0; +} + +/* + * This returns the channel with a reference. The caller must + * gk20a_channel_put() the ref back after use. + * + * NULL is returned if the channel was not found. + */ +struct channel_gk20a *gk20a_get_channel_from_file(int fd) +{ + struct channel_gk20a *ch; + struct channel_priv *priv; + struct file *f = fget(fd); + + if (!f) + return NULL; + + if (f->f_op != &gk20a_channel_ops) { + fput(f); + return NULL; + } + + priv = (struct channel_priv *)f->private_data; + ch = gk20a_channel_get(priv->c); + fput(f); + return ch; +} + +int gk20a_channel_release(struct inode *inode, struct file *filp) +{ + struct channel_priv *priv = filp->private_data; + struct channel_gk20a *ch; + struct gk20a *g; + + int err; + + /* We could still end up here even if the channel_open failed, e.g. + * if we ran out of hw channel IDs. + */ + if (!priv) + return 0; + + ch = priv->c; + g = priv->g; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to release a channel!"); + goto channel_release; + } + + trace_gk20a_channel_release(dev_name(dev_from_gk20a(g))); + + gk20a_channel_close(ch); + gk20a_channel_free_error_notifiers(ch); + + gk20a_idle(g); + +channel_release: + gk20a_put(g); + nvgpu_kfree(g, filp->private_data); + filp->private_data = NULL; + return 0; +} + +/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */ +static int __gk20a_channel_open(struct gk20a *g, + struct file *filp, s32 runlist_id) +{ + int err; + struct channel_gk20a *ch; + struct channel_priv *priv; + + nvgpu_log_fn(g, " "); + + g = gk20a_get(g); + if (!g) + return -ENODEV; + + trace_gk20a_channel_open(dev_name(dev_from_gk20a(g))); + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) { + err = -ENOMEM; + goto free_ref; + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on, %d", err); + goto fail_busy; + } + /* All the user space channel should be non privilege */ + ch = gk20a_open_new_channel(g, runlist_id, false, + nvgpu_current_pid(g), nvgpu_current_tid(g)); + gk20a_idle(g); + if (!ch) { + nvgpu_err(g, + "failed to get f"); + err = -ENOMEM; + goto fail_busy; + } + + gk20a_channel_trace_sched_param( + trace_gk20a_channel_sched_defaults, ch); + + priv->g = g; + priv->c = ch; + + filp->private_data = priv; + return 0; + +fail_busy: + nvgpu_kfree(g, priv); +free_ref: + gk20a_put(g); + return err; +} + +int gk20a_channel_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l = container_of(inode->i_cdev, + struct nvgpu_os_linux, channel.cdev); + struct gk20a *g = &l->g; + int ret; + + nvgpu_log_fn(g, "start"); + ret = __gk20a_channel_open(g, filp, -1); + + nvgpu_log_fn(g, "end"); + return ret; +} + +int gk20a_channel_open_ioctl(struct gk20a *g, + struct nvgpu_channel_open_args *args) +{ + int err; + int fd; + struct file *file; + char name[64]; + s32 runlist_id = args->in.runlist_id; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + return err; + fd = err; + + snprintf(name, sizeof(name), "nvhost-%s-fd%d", + dev_name(dev_from_gk20a(g)), fd); + + file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + + err = __gk20a_channel_open(g, file, runlist_id); + if (err) + goto clean_up_file; + + fd_install(fd, file); + args->out.channel_fd = fd; + return 0; + +clean_up_file: + fput(file); +clean_up: + put_unused_fd(fd); + return err; +} + +static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags) +{ + u32 flags = 0; + + if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) + flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR; + + if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) + flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC; + + if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) + flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE; + + return flags; +} + +static void nvgpu_get_gpfifo_ex_args( + struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args, + struct nvgpu_gpfifo_args *gpfifo_args) +{ + gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries; + gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs; + gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( + alloc_gpfifo_ex_args->flags); +} + +static void nvgpu_get_gpfifo_args( + struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args, + struct nvgpu_gpfifo_args *gpfifo_args) +{ + /* + * Kernel can insert one extra gpfifo entry before user + * submitted gpfifos and another one after, for internal usage. + * Triple the requested size. + */ + gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3; + gpfifo_args->num_inflight_jobs = 0; + gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( + alloc_gpfifo_args->flags); +} + +static void nvgpu_get_fence_args( + struct nvgpu_fence *fence_args_in, + struct nvgpu_channel_fence *fence_args_out) +{ + fence_args_out->id = fence_args_in->id; + fence_args_out->value = fence_args_in->value; +} + +static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, + ulong id, u32 offset, + u32 payload, u32 timeout) +{ + struct dma_buf *dmabuf; + void *data; + u32 *semaphore; + int ret = 0; + + /* do not wait if channel has timed out */ + if (ch->has_timedout) + return -ETIMEDOUT; + + dmabuf = dma_buf_get(id); + if (IS_ERR(dmabuf)) { + nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id); + return -EINVAL; + } + + data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); + if (!data) { + nvgpu_err(ch->g, "failed to map notifier memory"); + ret = -EINVAL; + goto cleanup_put; + } + + semaphore = data + (offset & ~PAGE_MASK); + + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &ch->semaphore_wq, + *semaphore == payload || ch->has_timedout, + timeout); + + dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); +cleanup_put: + dma_buf_put(dmabuf); + return ret; +} + +static int gk20a_channel_wait(struct channel_gk20a *ch, + struct nvgpu_wait_args *args) +{ + struct dma_buf *dmabuf; + struct gk20a *g = ch->g; + struct notification *notif; + struct timespec tv; + u64 jiffies; + ulong id; + u32 offset; + int remain, ret = 0; + u64 end; + + nvgpu_log_fn(g, " "); + + if (ch->has_timedout) + return -ETIMEDOUT; + + switch (args->type) { + case NVGPU_WAIT_TYPE_NOTIFIER: + id = args->condition.notifier.dmabuf_fd; + offset = args->condition.notifier.offset; + end = offset + sizeof(struct notification); + + dmabuf = dma_buf_get(id); + if (IS_ERR(dmabuf)) { + nvgpu_err(g, "invalid notifier nvmap handle 0x%lx", + id); + return -EINVAL; + } + + if (end > dmabuf->size || end < sizeof(struct notification)) { + dma_buf_put(dmabuf); + nvgpu_err(g, "invalid notifier offset"); + return -EINVAL; + } + + nvgpu_speculation_barrier(); + + notif = dma_buf_vmap(dmabuf); + if (!notif) { + nvgpu_err(g, "failed to map notifier memory"); + return -ENOMEM; + } + + notif = (struct notification *)((uintptr_t)notif + offset); + + /* user should set status pending before + * calling this ioctl */ + remain = NVGPU_COND_WAIT_INTERRUPTIBLE( + &ch->notifier_wq, + notif->status == 0 || ch->has_timedout, + args->timeout); + + if (remain == 0 && notif->status != 0) { + ret = -ETIMEDOUT; + goto notif_clean_up; + } else if (remain < 0) { + ret = -EINTR; + goto notif_clean_up; + } + + /* TBD: fill in correct information */ + jiffies = get_jiffies_64(); + jiffies_to_timespec(jiffies, &tv); + notif->timestamp.nanoseconds[0] = tv.tv_nsec; + notif->timestamp.nanoseconds[1] = tv.tv_sec; + notif->info32 = 0xDEADBEEF; /* should be object name */ + notif->info16 = ch->chid; /* should be method offset */ + +notif_clean_up: + dma_buf_vunmap(dmabuf, notif); + return ret; + + case NVGPU_WAIT_TYPE_SEMAPHORE: + ret = gk20a_channel_wait_semaphore(ch, + args->condition.semaphore.dmabuf_fd, + args->condition.semaphore.offset, + args->condition.semaphore.payload, + args->timeout); + + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, + struct nvgpu_zcull_bind_args *args) +{ + struct gk20a *g = ch->g; + struct gr_gk20a *gr = &g->gr; + + nvgpu_log_fn(gr->g, " "); + + return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, + args->gpu_va, args->mode); +} + +static int gk20a_ioctl_channel_submit_gpfifo( + struct channel_gk20a *ch, + struct nvgpu_submit_gpfifo_args *args) +{ + struct nvgpu_channel_fence fence; + struct gk20a_fence *fence_out; + struct fifo_profile_gk20a *profile = NULL; + u32 submit_flags = 0; + int fd = -1; + struct gk20a *g = ch->g; + + int ret = 0; + nvgpu_log_fn(g, " "); + + profile = gk20a_fifo_profile_acquire(ch->g); + gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); + + if (ch->has_timedout) + return -ETIMEDOUT; + + nvgpu_get_fence_args(&args->fence, &fence); + submit_flags = + nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); + + /* Try and allocate an fd here*/ + if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) + && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) + return fd; + } + + ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, + submit_flags, &fence, + &fence_out, profile); + + if (ret) { + if (fd != -1) + put_unused_fd(fd); + goto clean_up; + } + + /* Convert fence_out to something we can pass back to user space. */ + if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { + if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { + ret = gk20a_fence_install_fd(fence_out, fd); + if (ret) + put_unused_fd(fd); + else + args->fence.id = fd; + } else { + args->fence.id = fence_out->syncpt_id; + args->fence.value = fence_out->syncpt_value; + } + } + gk20a_fence_put(fence_out); + + gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT); + if (profile) + gk20a_fifo_profile_release(ch->g, profile); + +clean_up: + return ret; +} + +/* + * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_* + * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_* + */ +u32 nvgpu_get_common_runlist_level(u32 level) +{ + switch (level) { + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: + return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: + return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM; + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: + return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH; + default: + pr_err("%s: incorrect runlist level\n", __func__); + } + + return level; +} + +static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags) +{ + u32 flags = 0; + + if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) + flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP; + + if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) + flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP; + + return flags; +} + +static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch, + u32 class_num, u32 user_flags) +{ + return ch->g->ops.gr.alloc_obj_ctx(ch, class_num, + nvgpu_obj_ctx_user_flags_to_common_flags(user_flags)); +} + +/* + * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* + * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags) +{ + u32 flags = 0; + + if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI) + flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; + if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) + flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; + + return flags; +} + +/* + * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_* + * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags) +{ + u32 flags = 0; + + if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI) + flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI; + if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA) + flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA; + if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP) + flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP; + + return flags; +} + +/* + * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* + * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode) +{ + switch (graphics_preempt_mode) { + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; + case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: + return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; + } + + return graphics_preempt_mode; +} + +/* + * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* + * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) +{ + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + return NVGPU_COMPUTE_PREEMPTION_MODE_WFI; + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + return NVGPU_COMPUTE_PREEMPTION_MODE_CTA; + case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: + return NVGPU_COMPUTE_PREEMPTION_MODE_CILP; + } + + return compute_preempt_mode; +} + +/* + * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* + * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* + */ +static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) +{ + switch (graphics_preempt_mode) { + case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: + return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; + case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP: + return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } + + return graphics_preempt_mode; +} + +/* + * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* + * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* + */ +static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) +{ + switch (compute_preempt_mode) { + case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: + return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; + case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: + return NVGPU_PREEMPTION_MODE_COMPUTE_CTA; + case NVGPU_COMPUTE_PREEMPTION_MODE_CILP: + return NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } + + return compute_preempt_mode; +} + +static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, + u32 graphics_preempt_mode, u32 compute_preempt_mode) +{ + int err; + + if (ch->g->ops.gr.set_preemption_mode) { + err = gk20a_busy(ch->g); + if (err) { + nvgpu_err(ch->g, "failed to power on, %d", err); + return err; + } + err = ch->g->ops.gr.set_preemption_mode(ch, + nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode), + nvgpu_get_common_compute_preempt_mode(compute_preempt_mode)); + gk20a_idle(ch->g); + } else { + err = -EINVAL; + } + + return err; +} + +static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, + struct nvgpu_get_user_syncpoint_args *args) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a *g = ch->g; + int err; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { + nvgpu_err(g, "user syncpoints not supported"); + return -EINVAL; + } + + if (!gk20a_platform_has_syncpoints(g)) { + nvgpu_err(g, "syncpoints not supported"); + return -EINVAL; + } + + if (g->aggressive_sync_destroy_thresh) { + nvgpu_err(g, "sufficient syncpoints not available"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->user_sync) { + nvgpu_mutex_release(&ch->sync_lock); + } else { + ch->user_sync = gk20a_channel_sync_create(ch, true); + if (!ch->user_sync) { + nvgpu_mutex_release(&ch->sync_lock); + return -ENOMEM; + } + nvgpu_mutex_release(&ch->sync_lock); + + if (g->ops.fifo.resetup_ramfc) { + err = g->ops.fifo.resetup_ramfc(ch); + if (err) + return err; + } + } + + args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync); + args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, + args->syncpoint_id); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) + args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync); + else + args->gpu_va = 0; + + return 0; +#else + return -EINVAL; +#endif +} + +long gk20a_channel_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct channel_priv *priv = filp->private_data; + struct channel_gk20a *ch = priv->c; + struct device *dev = dev_from_gk20a(ch->g); + u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0}; + int err = 0; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) + return -EINVAL; + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + /* take a ref or return timeout if channel refs can't be taken */ + ch = gk20a_channel_get(ch); + if (!ch) + return -ETIMEDOUT; + + /* protect our sanity for threaded userspace - most of the channel is + * not thread safe */ + nvgpu_mutex_acquire(&ch->ioctl_lock); + + /* this ioctl call keeps a ref to the file which keeps a ref to the + * channel */ + + switch (cmd) { + case NVGPU_IOCTL_CHANNEL_OPEN: + err = gk20a_channel_open_ioctl(ch->g, + (struct nvgpu_channel_open_args *)buf); + break; + case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: + break; + case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: + { + struct nvgpu_alloc_obj_ctx_args *args = + (struct nvgpu_alloc_obj_ctx_args *)buf; + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags); + gk20a_idle(ch->g); + break; + } + case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: + { + struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args = + (struct nvgpu_alloc_gpfifo_ex_args *)buf; + struct nvgpu_gpfifo_args gpfifo_args; + + nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args); + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + + if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { + err = -EINVAL; + gk20a_idle(ch->g); + break; + } + err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); + gk20a_idle(ch->g); + break; + } + case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: + { + struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = + (struct nvgpu_alloc_gpfifo_args *)buf; + struct nvgpu_gpfifo_args gpfifo_args; + + nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args); + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + + err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); + gk20a_idle(ch->g); + break; + } + case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: + err = gk20a_ioctl_channel_submit_gpfifo(ch, + (struct nvgpu_submit_gpfifo_args *)buf); + break; + case NVGPU_IOCTL_CHANNEL_WAIT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + + /* waiting is thread-safe, not dropping this mutex could + * deadlock in certain conditions */ + nvgpu_mutex_release(&ch->ioctl_lock); + + err = gk20a_channel_wait(ch, + (struct nvgpu_wait_args *)buf); + + nvgpu_mutex_acquire(&ch->ioctl_lock); + + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_zcull_bind(ch, + (struct nvgpu_zcull_bind_args *)buf); + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_init_error_notifier(ch, + (struct nvgpu_set_error_notifier *)buf); + gk20a_idle(ch->g); + break; +#ifdef CONFIG_GK20A_CYCLE_STATS + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_cycle_stats(ch, + (struct nvgpu_cycle_stats_args *)buf); + gk20a_idle(ch->g); + break; +#endif + case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: + { + u32 timeout = + (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; + nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", + timeout, ch->chid); + ch->timeout_ms_max = timeout; + gk20a_channel_trace_sched_param( + trace_gk20a_channel_set_timeout, ch); + break; + } + case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: + { + u32 timeout = + (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; + bool timeout_debug_dump = !((u32) + ((struct nvgpu_set_timeout_ex_args *)buf)->flags & + (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); + nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", + timeout, ch->chid); + ch->timeout_ms_max = timeout; + ch->timeout_debug_dump = timeout_debug_dump; + gk20a_channel_trace_sched_param( + trace_gk20a_channel_set_timeout, ch); + break; + } + case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: + ((struct nvgpu_get_param_args *)buf)->value = + ch->has_timedout; + break; + case NVGPU_IOCTL_CHANNEL_ENABLE: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + if (ch->g->ops.fifo.enable_channel) + ch->g->ops.fifo.enable_channel(ch); + else + err = -ENOSYS; + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_DISABLE: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + if (ch->g->ops.fifo.disable_channel) + ch->g->ops.fifo.disable_channel(ch); + else + err = -ENOSYS; + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_PREEMPT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_fifo_preempt(ch->g, ch); + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: + if (!capable(CAP_SYS_NICE)) { + err = -EPERM; + break; + } + if (!ch->g->ops.fifo.reschedule_runlist) { + err = -ENOSYS; + break; + } + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.fifo.reschedule_runlist(ch, + NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & + ((struct nvgpu_reschedule_runlist_args *)buf)->flags); + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_FORCE_RESET: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.fifo.force_reset_ch(ch, + NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); + gk20a_idle(ch->g); + break; +#ifdef CONFIG_GK20A_CYCLE_STATS + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_cycle_stats_snapshot(ch, + (struct nvgpu_cycle_stats_snapshot_args *)buf); + gk20a_idle(ch->g); + break; +#endif + case NVGPU_IOCTL_CHANNEL_WDT: + err = gk20a_channel_set_wdt_status(ch, + (struct nvgpu_channel_wdt_args *)buf); + break; + case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE: + err = nvgpu_ioctl_channel_set_preemption_mode(ch, + ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode, + ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode); + break; + case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: + if (ch->g->ops.gr.set_boosted_ctx) { + bool boost = + ((struct nvgpu_boosted_ctx_args *)buf)->boost; + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.gr.set_boosted_ctx(ch, boost); + gk20a_idle(ch->g); + } else { + err = -EINVAL; + } + break; + case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = nvgpu_ioctl_channel_get_user_syncpoint(ch, + (struct nvgpu_get_user_syncpoint_args *)buf); + gk20a_idle(ch->g); + break; + default: + dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + nvgpu_mutex_release(&ch->ioctl_lock); + + gk20a_channel_put(ch); + + nvgpu_log_fn(g, "end"); + + return err; +} -- cgit v1.2.2