From c8ffe0fdecfa110a9f9beb1b7e0298d3c3c64cc2 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Tue, 10 May 2016 09:05:45 -0700 Subject: gpu: nvgpu: add sched control API Added a dedicated device node to allow an app manager to control TSG scheduling parameters: - Get list of TSGs - Get list of recent TSGs - Get list of TSGs per pid - Get TSG current scheduling parameters - Set TSG timeslice - Set TSG runlist interleave Jira VFND-1586 Change-Id: I014c9d1534bce0eaea6c25ad114cf0cff317af79 Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1160384 (cherry picked from commit 75ca739517cc7f7f76714b5f6a1a57c39b8cb38e) Reviewed-on: http://git-master/r/1167021 Reviewed-by: Richard Zhao GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 11 +- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 7 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 32 ++ drivers/gpu/nvgpu/gk20a/gk20a.h | 9 + drivers/gpu/nvgpu/gk20a/sched_gk20a.c | 603 ++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/sched_gk20a.h | 52 +++ drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 115 ++++-- drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 4 + drivers/gpu/nvgpu/vgpu/vgpu.c | 2 + 10 files changed, 801 insertions(+), 35 deletions(-) create mode 100644 drivers/gpu/nvgpu/gk20a/sched_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/sched_gk20a.h (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 1bc2b9cc..0fdd2e28 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_GK20A) := nvgpu.o nvgpu-y := \ gk20a/gk20a.o \ + gk20a/sched_gk20a.o \ gk20a/as_gk20a.o \ gk20a/ctrl_gk20a.o \ gk20a/ce2_gk20a.o \ diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index d435bf79..d43c06be 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c @@ -677,22 +677,13 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) .vmid = 0, .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, .context_id = 0, - .pid = 0, + .pid = tsg->tgid, }; - struct channel_gk20a *ch; if (!g->ctxsw_trace) return; g->ops.read_ptimer(g, &entry.timestamp); - mutex_lock(&tsg->ch_list_lock); - if (!list_empty(&tsg->ch_list)) { - ch = list_entry(tsg->ch_list.next, - struct channel_gk20a, ch_entry); - entry.pid = ch->pid; - } - mutex_unlock(&tsg->ch_list_lock); - gk20a_ctxsw_trace_write(g, &entry); gk20a_ctxsw_trace_wake_up(g, 0); #endif diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 69e2b409..15e645f2 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -596,6 +596,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, struct gk20a_fecs_trace *trace = g->fecs_trace; struct mem_desc *mem = &ch_ctx->gr_ctx->mem; u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); + pid_t pid; gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "hw_chid=%d context_ptr=%x inst_block=%llx", @@ -630,7 +631,11 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, GK20A_FECS_TRACE_NUM_RECORDS)); gk20a_mem_end(g, mem); - gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); + if (gk20a_is_channel_marked_as_tsg(ch)) + pid = tsg_gk20a_from_ch(ch)->tgid; + else + pid = ch->pid; + gk20a_fecs_trace_hash_add(g, context_ptr, pid); return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9255c847..822cd3ff 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -246,6 +246,18 @@ static const struct file_operations gk20a_ctxsw_ops = { .mmap = gk20a_ctxsw_dev_mmap, }; +static const struct file_operations gk20a_sched_ops = { + .owner = THIS_MODULE, + .release = gk20a_sched_dev_release, + .open = gk20a_sched_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_sched_dev_ioctl, +#endif + .unlocked_ioctl = gk20a_sched_dev_ioctl, + .poll = gk20a_sched_dev_poll, + .read = gk20a_sched_dev_read, +}; + static inline void sim_writel(struct gk20a *g, u32 r, u32 v) { writel(v, g->sim.regs+r); @@ -965,6 +977,12 @@ int gk20a_pm_finalize_poweron(struct device *dev) if (err) gk20a_warn(dev, "could not initialize ctxsw tracing"); + err = gk20a_sched_ctrl_init(g); + if (err) { + gk20a_err(dev, "failed to init sched control"); + goto done; + } + /* Restore the debug setting */ g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl); @@ -1101,6 +1119,11 @@ void gk20a_user_deinit(struct device *dev, struct class *class) cdev_del(&g->ctxsw.cdev); } + if (g->sched.node) { + device_destroy(&nvgpu_class, g->sched.cdev.dev); + cdev_del(&g->sched.cdev); + } + if (g->cdev_region) unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); } @@ -1170,6 +1193,12 @@ int gk20a_user_init(struct device *dev, const char *interface_name, goto fail; #endif + err = gk20a_create_device(dev, devno++, interface_name, "-sched", + &g->sched.cdev, &g->sched.node, + &gk20a_sched_ops, + class); + if (err) + goto fail; return 0; fail: @@ -1632,6 +1661,7 @@ static int gk20a_probe(struct platform_device *dev) gk20a_alloc_debugfs_init(dev); gk20a_mm_debugfs_init(&dev->dev); gk20a_fifo_debugfs_init(&dev->dev); + gk20a_sched_debugfs_init(&dev->dev); #endif gk20a_init_gr(gk20a); @@ -1655,6 +1685,8 @@ static int __exit gk20a_remove(struct platform_device *pdev) gk20a_ctxsw_trace_cleanup(g); + gk20a_sched_ctrl_cleanup(g); + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) gk20a_scale_exit(dev); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c5da68cc..8aa8689b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -52,6 +52,7 @@ struct acr_desc; #include "acr.h" #include "cde_gk20a.h" #include "debug_gk20a.h" +#include "sched_gk20a.h" /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is the resolution of ptimer. */ @@ -817,6 +818,11 @@ struct gk20a { struct device *node; } ctxsw; + struct { + struct cdev cdev; + struct device *node; + } sched; + struct mutex client_lock; int client_refcount; /* open channels and ctrl nodes */ @@ -847,6 +853,8 @@ struct gk20a { struct gk20a_ctxsw_trace *ctxsw_trace; struct gk20a_fecs_trace *fecs_trace; + struct gk20a_sched_ctrl sched_ctrl; + struct device_dma_parameters dma_parms; struct gk20a_cde_app cde_app; @@ -925,6 +933,7 @@ enum gk20a_dbg_categories { gpu_dbg_cde = BIT(10), /* cde info messages */ gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ + gpu_dbg_sched = BIT(13), /* sched control tracing */ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ }; diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c new file mode 100644 index 00000000..bcbbbe8b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctxsw_trace_gk20a.h" +#include "gk20a.h" +#include "gr_gk20a.h" +#include "hw_ctxsw_prog_gk20a.h" +#include "hw_gr_gk20a.h" +#include "sched_gk20a.h" + +ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, + size_t size, loff_t *off) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + struct nvgpu_sched_event_arg event = { 0 }; + int err; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, + "filp=%p buf=%p size=%zu", filp, buf, size); + + if (size < sizeof(event)) + return -EINVAL; + size = sizeof(event); + + mutex_lock(&sched->status_lock); + while (!sched->status) { + mutex_unlock(&sched->status_lock); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + err = wait_event_interruptible(sched->readout_wq, + sched->status); + if (err) + return err; + mutex_lock(&sched->status_lock); + } + + event.reserved = 0; + event.status = sched->status; + + if (copy_to_user(buf, &event, size)) { + mutex_unlock(&sched->status_lock); + return -EFAULT; + } + + sched->status = 0; + + mutex_unlock(&sched->status_lock); + + return size; +} + +unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + unsigned int mask = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, ""); + + mutex_lock(&sched->status_lock); + poll_wait(filp, &sched->readout_wq, wait); + if (sched->status) + mask |= POLLIN | POLLRDNORM; + mutex_unlock(&sched->status_lock); + + return mask; +} + +static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_get_tsgs_args *arg) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", + arg->size, arg->buffer); + + if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { + arg->size = sched->bitmap_size; + return -ENOSPC; + } + + mutex_lock(&sched->status_lock); + if (copy_to_user((void __user *)(uintptr_t)arg->buffer, + sched->active_tsg_bitmap, sched->bitmap_size)) { + mutex_unlock(&sched->status_lock); + return -EFAULT; + } + + memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); + mutex_unlock(&sched->status_lock); + + return 0; +} + +static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_get_tsgs_args *arg) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", + arg->size, arg->buffer); + + if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { + arg->size = sched->bitmap_size; + return -ENOSPC; + } + + mutex_lock(&sched->status_lock); + if (copy_to_user((void __user *)(uintptr_t)arg->buffer, + sched->recent_tsg_bitmap, sched->bitmap_size)) { + mutex_unlock(&sched->status_lock); + return -EFAULT; + } + + memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); + mutex_unlock(&sched->status_lock); + + return 0; +} + +static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_get_tsgs_by_pid_args *arg) +{ + struct fifo_gk20a *f = &sched->g->fifo; + struct tsg_gk20a *tsg; + u64 *bitmap; + int tsgid; + /* pid at user level corresponds to kernel tgid */ + pid_t tgid = (pid_t)arg->pid; + int err = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx", + (pid_t)arg->pid, arg->size, arg->buffer); + + if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { + arg->size = sched->bitmap_size; + return -ENOSPC; + } + + bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL); + if (!bitmap) + return -ENOMEM; + + mutex_lock(&f->tsg_inuse_mutex); + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + tsg = &f->tsg[tsgid]; + if ((tsg->in_use) && (tsg->tgid == tgid)) + NVGPU_SCHED_SET(tsgid, bitmap); + } + mutex_unlock(&f->tsg_inuse_mutex); + + if (copy_to_user((void __user *)(uintptr_t)arg->buffer, + bitmap, sched->bitmap_size)) + err = -EFAULT; + + kfree(bitmap); + + return err; +} + +static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_get_params_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + int err = -ENXIO; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + mutex_lock(&f->tsg_inuse_mutex); + tsg = &f->tsg[tsgid]; + if (!tsg->in_use) + goto unlock_in_use; + + mutex_lock(&sched->status_lock); + if (!NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { + gk20a_dbg(gpu_dbg_sched, "tsgid=%u not active", tsgid); + goto unlock_status; + } + + arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ + arg->runlist_interleave = tsg->interleave_level; + arg->timeslice = tsg->timeslice_us; + + if (tsg->tsg_gr_ctx) { + arg->graphics_preempt_mode = + tsg->tsg_gr_ctx->graphics_preempt_mode; + arg->compute_preempt_mode = + tsg->tsg_gr_ctx->compute_preempt_mode; + } else { + arg->graphics_preempt_mode = 0; + arg->compute_preempt_mode = 0; + } + + err = 0; + +unlock_status: + mutex_unlock(&sched->status_lock); + +unlock_in_use: + mutex_unlock(&f->tsg_inuse_mutex); + + return err; +} + +static int gk20a_sched_dev_ioctl_tsg_set_timeslice( + struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_timeslice_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + int err = -ENXIO; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + mutex_lock(&f->tsg_inuse_mutex); + tsg = &f->tsg[tsgid]; + if (!tsg->in_use) + goto unlock_in_use; + + mutex_lock(&sched->status_lock); + if (NVGPU_SCHED_ISSET(tsgid, sched->recent_tsg_bitmap)) { + gk20a_dbg(gpu_dbg_sched, "tsgid=%u was re-allocated", tsgid); + goto unlock_status; + } + + err = gk20a_busy(g->dev); + if (err) + goto unlock_status; + + err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); + + gk20a_idle(g->dev); + +unlock_status: + mutex_unlock(&sched->status_lock); + +unlock_in_use: + mutex_unlock(&f->tsg_inuse_mutex); + + return err; +} + +static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( + struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_runlist_interleave_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + int err = -ENXIO; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + mutex_lock(&f->tsg_inuse_mutex); + tsg = &f->tsg[tsgid]; + if (!tsg->in_use) + goto unlock_in_use; + + mutex_lock(&sched->status_lock); + if (NVGPU_SCHED_ISSET(tsgid, sched->recent_tsg_bitmap)) { + gk20a_dbg(gpu_dbg_sched, "tsgid=%u was re-allocated", tsgid); + goto unlock_status; + } + + err = gk20a_busy(g->dev); + if (err) + goto unlock_status; + + err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); + + gk20a_idle(g->dev); + +unlock_status: + mutex_unlock(&sched->status_lock); + +unlock_in_use: + mutex_unlock(&f->tsg_inuse_mutex); + + return err; +} + +static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, ""); + + mutex_lock(&sched->control_lock); + sched->control_locked = true; + mutex_unlock(&sched->control_lock); + return 0; +} + +static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, ""); + + mutex_lock(&sched->control_lock); + sched->control_locked = false; + mutex_unlock(&sched->control_lock); + return 0; +} + +int gk20a_sched_dev_open(struct inode *inode, struct file *filp) +{ + struct gk20a *g = container_of(inode->i_cdev, + struct gk20a, sched.cdev); + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + int err; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "g=%p", g); + + if (!sched->sw_ready) { + err = gk20a_busy(g->dev); + if (err) + return err; + + gk20a_idle(g->dev); + } + + if (!mutex_trylock(&sched->busy_lock)) + return -EBUSY; + + memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, + sched->bitmap_size); + + filp->private_data = sched; + gk20a_dbg(gpu_dbg_sched, "filp=%p sched=%p", filp, sched); + + return 0; +} + +long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + struct gk20a *g = sched->g; + u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + switch (cmd) { + case NVGPU_SCHED_IOCTL_GET_TSGS: + err = gk20a_sched_dev_ioctl_get_tsgs(sched, + (struct nvgpu_sched_get_tsgs_args *)buf); + break; + case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS: + err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched, + (struct nvgpu_sched_get_tsgs_args *)buf); + break; + case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID: + err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched, + (struct nvgpu_sched_get_tsgs_by_pid_args *)buf); + break; + case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS: + err = gk20a_sched_dev_ioctl_get_params(sched, + (struct nvgpu_sched_tsg_get_params_args *)buf); + break; + case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE: + err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched, + (struct nvgpu_sched_tsg_timeslice_args *)buf); + break; + case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: + err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched, + (struct nvgpu_sched_tsg_runlist_interleave_args *)buf); + break; + case NVGPU_SCHED_IOCTL_LOCK_CONTROL: + err = gk20a_sched_dev_ioctl_lock_control(sched); + break; + case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: + err = gk20a_sched_dev_ioctl_unlock_control(sched); + break; + default: + dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + } + + /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on + * purpose with NULL buffer and/or zero size to discover TSG bitmap + * size. We need to update user arguments in this case too, even + * if we return an error. + */ + if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) { + if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) + err = -EFAULT; + } + + return err; +} + +int gk20a_sched_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); + + /* unlock control */ + mutex_lock(&sched->control_lock); + sched->control_locked = false; + mutex_unlock(&sched->control_lock); + + mutex_unlock(&sched->busy_lock); + return 0; +} + +#ifdef CONFIG_DEBUG_FS +static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) +{ + struct device *dev = s->private; + struct gk20a *g = gk20a_get_platform(dev)->g; + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + + int n = sched->bitmap_size / sizeof(u64); + int i; + int err; + + err = gk20a_busy(g->dev); + if (err) + return err; + + seq_printf(s, "control_locked=%d\n", sched->control_locked); + seq_printf(s, "busy=%d\n", mutex_is_locked(&sched->busy_lock)); + seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); + + mutex_lock(&sched->status_lock); + + seq_puts(s, "active_tsg_bitmap\n"); + for (i = 0; i < n; i++) + seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); + + seq_puts(s, "recent_tsg_bitmap\n"); + for (i = 0; i < n; i++) + seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); + + mutex_unlock(&sched->status_lock); + + gk20a_idle(g->dev); + + return 0; +} + +static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_sched_debugfs_show, inode->i_private); +} + +static const struct file_operations gk20a_sched_debugfs_fops = { + .open = gk20a_sched_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void gk20a_sched_debugfs_init(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + + debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs, + dev, &gk20a_sched_debugfs_fops); +} +#endif /* CONFIG_DEBUG_FS */ + +void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + int err; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + if (!sched->sw_ready) { + err = gk20a_busy(g->dev); + if (err) { + WARN_ON(err); + return; + } + + gk20a_idle(g->dev); + } + + mutex_lock(&sched->status_lock); + NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap); + NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap); + sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN; + mutex_unlock(&sched->status_lock); + wake_up_interruptible(&sched->readout_wq); +} + +void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + mutex_lock(&sched->status_lock); + NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap); + + /* clear recent_tsg_bitmap as well: if app manager did not + * notice that TSG was previously added, no need to notify it + * if the TSG has been released in the meantime. If the + * TSG gets reallocated, app manager will be notified as usual. + */ + NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap); + + /* do not set event_pending, we only want to notify app manager + * when TSGs are added, so that it can apply sched params + */ + mutex_unlock(&sched->status_lock); +} + +int gk20a_sched_ctrl_init(struct gk20a *g) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + struct fifo_gk20a *f = &g->fifo; + + if (sched->sw_ready) + return 0; + + sched->g = g; + sched->bitmap_size = roundup(f->num_channels, 64) / 8; + sched->status = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu", + g, sched, sched->bitmap_size); + + sched->active_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL); + if (!sched->active_tsg_bitmap) + goto fail_active; + + sched->recent_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL); + if (!sched->recent_tsg_bitmap) + goto fail_recent; + + init_waitqueue_head(&sched->readout_wq); + mutex_init(&sched->status_lock); + mutex_init(&sched->control_lock); + mutex_init(&sched->busy_lock); + + sched->sw_ready = true; + + return 0; + +fail_recent: + kfree(sched->active_tsg_bitmap); + +fail_active: + return -ENOMEM; +} + +void gk20a_sched_ctrl_cleanup(struct gk20a *g) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + + kfree(sched->active_tsg_bitmap); + kfree(sched->recent_tsg_bitmap); + sched->active_tsg_bitmap = NULL; + sched->recent_tsg_bitmap = NULL; + sched->sw_ready = false; +} diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h new file mode 100644 index 00000000..8f533056 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __SCHED_GK20A_H +#define __SCHED_GK20A_H + +struct gk20a; +struct gpu_ops; +struct tsg_gk20a; + +struct gk20a_sched_ctrl { + struct gk20a *g; + + struct mutex control_lock; + bool control_locked; + bool sw_ready; + struct mutex status_lock; + struct mutex busy_lock; + + u64 status; + + size_t bitmap_size; + u64 *active_tsg_bitmap; + u64 *recent_tsg_bitmap; + + wait_queue_head_t readout_wq; +}; + +int gk20a_sched_dev_release(struct inode *inode, struct file *filp); +int gk20a_sched_dev_open(struct inode *inode, struct file *filp); +long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long); +ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *); +unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *); + +void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); +void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); +int gk20a_sched_ctrl_init(struct gk20a *); + +void gk20a_sched_debugfs_init(struct device *dev); +void gk20a_sched_ctrl_cleanup(struct gk20a *g); + +#endif /* __SCHED_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 0fa93da9..af8f0f7b 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -338,7 +338,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, return err; } -static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) +int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) { struct gk20a *g = tsg->g; int ret; @@ -349,6 +349,8 @@ static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: ret = g->ops.fifo.set_runlist_interleave(g, tsg->tsgid, true, 0, level); + if (!ret) + tsg->interleave_level = level; break; default: ret = -EINVAL; @@ -358,7 +360,7 @@ static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) return ret ? ret : g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true); } -static int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice) +int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice) { struct gk20a *g = tsg->g; @@ -369,6 +371,8 @@ static int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice) gk20a_channel_get_timescale_from_timeslice(g, timeslice, &tsg->timeslice_timeout, &tsg->timeslice_scale); + tsg->timeslice_us = timeslice; + return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true); } @@ -421,11 +425,14 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp) tsg->timeslice_timeout = 0; tsg->timeslice_scale = 0; tsg->runlist_id = ~0; + tsg->tgid = current->tgid; filp->private_data = tsg; gk20a_dbg(gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid); + gk20a_sched_ctrl_tsg_added(g, tsg); + return 0; } @@ -456,6 +463,7 @@ static void gk20a_tsg_release(struct kref *ref) tsg->vm = NULL; } + gk20a_sched_ctrl_tsg_removed(g, tsg); release_used_tsg(&g->fifo, tsg); tsg->runlist_id = ~0; @@ -470,6 +478,81 @@ int gk20a_tsg_dev_release(struct inode *inode, struct file *filp) return 0; } +static int gk20a_tsg_ioctl_set_priority(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_set_priority_args *arg) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + int err; + + mutex_lock(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto done; + } + + err = gk20a_busy(g->dev); + if (err) { + gk20a_err(dev_from_gk20a(g), "failed to power on gpu"); + goto done; + } + + err = gk20a_tsg_set_priority(g, tsg, arg->priority); + + gk20a_idle(g->dev); +done: + mutex_unlock(&sched->control_lock); + return err; +} + +static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + int err; + + mutex_lock(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto done; + } + err = gk20a_busy(g->dev); + if (err) { + gk20a_err(dev_from_gk20a(g), "failed to power on gpu"); + goto done; + } + + err = gk20a_tsg_set_runlist_interleave(tsg, arg->level); + + gk20a_idle(g->dev); +done: + mutex_unlock(&sched->control_lock); + return err; +} + +static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + int err; + + mutex_lock(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto done; + } + err = gk20a_busy(g->dev); + if (err) { + gk20a_err(dev_from_gk20a(g), "failed to power on gpu"); + goto done; + } + err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); + gk20a_idle(g->dev); +done: + mutex_unlock(&sched->control_lock); + return err; +} + + long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -561,8 +644,8 @@ long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd, case NVGPU_IOCTL_TSG_SET_PRIORITY: { - err = gk20a_tsg_set_priority(g, tsg, - ((struct nvgpu_set_priority_args *)buf)->priority); + err = gk20a_tsg_ioctl_set_priority(g, tsg, + (struct nvgpu_set_priority_args *)buf); break; } @@ -574,30 +657,14 @@ long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd, } case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: - { - err = gk20a_busy(g->dev); - if (err) { - gk20a_err(dev_from_gk20a(g), - "failed to host gk20a for ioctl cmd: 0x%x", cmd); - return err; - } - err = gk20a_tsg_set_runlist_interleave(tsg, - ((struct nvgpu_runlist_interleave_args *)buf)->level); - gk20a_idle(g->dev); + err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, + (struct nvgpu_runlist_interleave_args *)buf); break; - } case NVGPU_IOCTL_TSG_SET_TIMESLICE: { - err = gk20a_busy(g->dev); - if (err) { - gk20a_err(dev_from_gk20a(g), - "failed to host gk20a for ioctl cmd: 0x%x", cmd); - return err; - } - err = g->ops.fifo.tsg_set_timeslice(tsg, - ((struct nvgpu_timeslice_args *)buf)->timeslice_us); - gk20a_idle(g->dev); + err = gk20a_tsg_ioctl_set_timeslice(g, tsg, + (struct nvgpu_timeslice_args *)buf); break; } diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 57414690..2819dd1c 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -56,6 +56,7 @@ struct tsg_gk20a { struct mutex event_id_list_lock; u32 runlist_id; + pid_t tgid; }; int gk20a_enable_tsg(struct tsg_gk20a *tsg); @@ -66,5 +67,8 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch); void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, int event_id); +int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level); +int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice); + #endif /* __TSG_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index dc7c4320..a00d52de 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -427,6 +427,7 @@ int vgpu_pm_finalize_poweron(struct device *dev) } gk20a_ctxsw_trace_init(g); + gk20a_sched_ctrl_init(g); gk20a_channel_resume(g); done: @@ -600,6 +601,7 @@ int vgpu_remove(struct platform_device *pdev) g->remove_support(dev); vgpu_comm_deinit(); + gk20a_sched_ctrl_cleanup(g); gk20a_user_deinit(dev, &nvgpu_class); gk20a_get_platform(dev)->g = NULL; kfree(g); -- cgit v1.2.2