From c8ffe0fdecfa110a9f9beb1b7e0298d3c3c64cc2 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Tue, 10 May 2016 09:05:45 -0700
Subject: gpu: nvgpu: add sched control API

Added a dedicated device node to allow an
app manager to control TSG scheduling parameters:
- Get list of TSGs
- Get list of recent TSGs
- Get list of TSGs per pid
- Get TSG current scheduling parameters
- Set TSG timeslice
- Set TSG runlist interleave

Jira VFND-1586

Change-Id: I014c9d1534bce0eaea6c25ad114cf0cff317af79
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1160384
(cherry picked from commit 75ca739517cc7f7f76714b5f6a1a57c39b8cb38e)
Reviewed-on: http://git-master/r/1167021
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                  |   1 +
 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c |  11 +-
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c  |   7 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c             |  32 ++
 drivers/gpu/nvgpu/gk20a/gk20a.h             |   9 +
 drivers/gpu/nvgpu/gk20a/sched_gk20a.c       | 603 ++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/sched_gk20a.h       |  52 +++
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.c         | 115 ++++--
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.h         |   4 +
 drivers/gpu/nvgpu/vgpu/vgpu.c               |   2 +
 10 files changed, 801 insertions(+), 35 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gk20a/sched_gk20a.c
 create mode 100644 drivers/gpu/nvgpu/gk20a/sched_gk20a.h

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 1bc2b9cc..0fdd2e28 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_GK20A) := nvgpu.o
 
 nvgpu-y := \
 	gk20a/gk20a.o \
+	gk20a/sched_gk20a.o \
 	gk20a/as_gk20a.o \
 	gk20a/ctrl_gk20a.o \
 	gk20a/ce2_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index d435bf79..d43c06be 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -677,22 +677,13 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
 		.vmid = 0,
 		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
 		.context_id = 0,
-		.pid = 0,
+		.pid = tsg->tgid,
 	};
-	struct channel_gk20a *ch;
 
 	if (!g->ctxsw_trace)
 		return;
 
 	g->ops.read_ptimer(g, &entry.timestamp);
-	mutex_lock(&tsg->ch_list_lock);
-	if (!list_empty(&tsg->ch_list)) {
-		ch = list_entry(tsg->ch_list.next,
-				struct channel_gk20a, ch_entry);
-		entry.pid = ch->pid;
-	}
-	mutex_unlock(&tsg->ch_list_lock);
-
 	gk20a_ctxsw_trace_write(g, &entry);
 	gk20a_ctxsw_trace_wake_up(g, 0);
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 69e2b409..15e645f2 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -596,6 +596,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
+	pid_t pid;
 
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
 			"hw_chid=%d context_ptr=%x inst_block=%llx",
@@ -630,7 +631,11 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 			GK20A_FECS_TRACE_NUM_RECORDS));
 
 	gk20a_mem_end(g, mem);
-	gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid);
+	if (gk20a_is_channel_marked_as_tsg(ch))
+		pid = tsg_gk20a_from_ch(ch)->tgid;
+	else
+		pid = ch->pid;
+	gk20a_fecs_trace_hash_add(g, context_ptr, pid);
 
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 9255c847..822cd3ff 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -246,6 +246,18 @@ static const struct file_operations gk20a_ctxsw_ops = {
 	.mmap = gk20a_ctxsw_dev_mmap,
 };
 
+static const struct file_operations gk20a_sched_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_sched_dev_release,
+	.open = gk20a_sched_dev_open,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_sched_dev_ioctl,
+#endif
+	.unlocked_ioctl = gk20a_sched_dev_ioctl,
+	.poll = gk20a_sched_dev_poll,
+	.read = gk20a_sched_dev_read,
+};
+
 static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
 {
 	writel(v, g->sim.regs+r);
@@ -965,6 +977,12 @@ int gk20a_pm_finalize_poweron(struct device *dev)
 	if (err)
 		gk20a_warn(dev, "could not initialize ctxsw tracing");
 
+	err = gk20a_sched_ctrl_init(g);
+	if (err) {
+		gk20a_err(dev, "failed to init sched control");
+		goto done;
+	}
+
 	/* Restore the debug setting */
 	g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl);
 
@@ -1101,6 +1119,11 @@ void gk20a_user_deinit(struct device *dev, struct class *class)
 		cdev_del(&g->ctxsw.cdev);
 	}
 
+	if (g->sched.node) {
+		device_destroy(&nvgpu_class, g->sched.cdev.dev);
+		cdev_del(&g->sched.cdev);
+	}
+
 	if (g->cdev_region)
 		unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
 }
@@ -1170,6 +1193,12 @@ int gk20a_user_init(struct device *dev, const char *interface_name,
 		goto fail;
 #endif
 
+	err = gk20a_create_device(dev, devno++, interface_name, "-sched",
+				  &g->sched.cdev, &g->sched.node,
+				  &gk20a_sched_ops,
+				  class);
+	if (err)
+		goto fail;
 
 	return 0;
 fail:
@@ -1632,6 +1661,7 @@ static int gk20a_probe(struct platform_device *dev)
 	gk20a_alloc_debugfs_init(dev);
 	gk20a_mm_debugfs_init(&dev->dev);
 	gk20a_fifo_debugfs_init(&dev->dev);
+	gk20a_sched_debugfs_init(&dev->dev);
 #endif
 
 	gk20a_init_gr(gk20a);
@@ -1655,6 +1685,8 @@ static int __exit gk20a_remove(struct platform_device *pdev)
 
 	gk20a_ctxsw_trace_cleanup(g);
 
+	gk20a_sched_ctrl_cleanup(g);
+
 	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
 		gk20a_scale_exit(dev);
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c5da68cc..8aa8689b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -52,6 +52,7 @@ struct acr_desc;
 #include "acr.h"
 #include "cde_gk20a.h"
 #include "debug_gk20a.h"
+#include "sched_gk20a.h"
 
 /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
     32 ns is the resolution of ptimer. */
@@ -817,6 +818,11 @@ struct gk20a {
 		struct device *node;
 	} ctxsw;
 
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} sched;
+
 	struct mutex client_lock;
 	int client_refcount; /* open channels and ctrl nodes */
 
@@ -847,6 +853,8 @@ struct gk20a {
 	struct gk20a_ctxsw_trace *ctxsw_trace;
 	struct gk20a_fecs_trace *fecs_trace;
 
+	struct gk20a_sched_ctrl sched_ctrl;
+
 	struct device_dma_parameters dma_parms;
 
 	struct gk20a_cde_app cde_app;
@@ -925,6 +933,7 @@ enum gk20a_dbg_categories {
 	gpu_dbg_cde     = BIT(10), /* cde info messages */
 	gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */
 	gpu_dbg_ctxsw   = BIT(12), /* ctxsw tracing */
+	gpu_dbg_sched   = BIT(13), /* sched control tracing */
 	gpu_dbg_mem     = BIT(31), /* memory accesses, very verbose */
 };
 
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
new file mode 100644
index 00000000..bcbbbe8b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <asm/barrier.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/wait.h>
+#include <linux/ktime.h>
+#include <linux/nvgpu.h>
+#include <linux/hashtable.h>
+#include <linux/debugfs.h>
+#include <linux/log2.h>
+#include <uapi/linux/nvgpu.h>
+#include "ctxsw_trace_gk20a.h"
+#include "gk20a.h"
+#include "gr_gk20a.h"
+#include "hw_ctxsw_prog_gk20a.h"
+#include "hw_gr_gk20a.h"
+#include "sched_gk20a.h"
+
+ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
+	size_t size, loff_t *off)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	struct nvgpu_sched_event_arg event = { 0 };
+	int err;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched,
+		"filp=%p buf=%p size=%zu", filp, buf, size);
+
+	if (size < sizeof(event))
+		return -EINVAL;
+	size = sizeof(event);
+
+	mutex_lock(&sched->status_lock);
+	while (!sched->status) {
+		mutex_unlock(&sched->status_lock);
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		err = wait_event_interruptible(sched->readout_wq,
+			sched->status);
+		if (err)
+			return err;
+		mutex_lock(&sched->status_lock);
+	}
+
+	event.reserved = 0;
+	event.status = sched->status;
+
+	if (copy_to_user(buf, &event, size)) {
+		mutex_unlock(&sched->status_lock);
+		return -EFAULT;
+	}
+
+	sched->status = 0;
+
+	mutex_unlock(&sched->status_lock);
+
+	return size;
+}
+
+unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	unsigned int mask = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
+
+	mutex_lock(&sched->status_lock);
+	poll_wait(filp, &sched->readout_wq, wait);
+	if (sched->status)
+		mask |= POLLIN | POLLRDNORM;
+	mutex_unlock(&sched->status_lock);
+
+	return mask;
+}
+
+static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_get_tsgs_args *arg)
+{
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
+			arg->size, arg->buffer);
+
+	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
+		arg->size = sched->bitmap_size;
+		return -ENOSPC;
+	}
+
+	mutex_lock(&sched->status_lock);
+	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
+		sched->active_tsg_bitmap, sched->bitmap_size)) {
+		mutex_unlock(&sched->status_lock);
+		return -EFAULT;
+	}
+
+	memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
+	mutex_unlock(&sched->status_lock);
+
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_get_tsgs_args *arg)
+{
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
+			arg->size, arg->buffer);
+
+	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
+		arg->size = sched->bitmap_size;
+		return -ENOSPC;
+	}
+
+	mutex_lock(&sched->status_lock);
+	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
+		sched->recent_tsg_bitmap, sched->bitmap_size)) {
+		mutex_unlock(&sched->status_lock);
+		return -EFAULT;
+	}
+
+	memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
+	mutex_unlock(&sched->status_lock);
+
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_get_tsgs_by_pid_args *arg)
+{
+	struct fifo_gk20a *f = &sched->g->fifo;
+	struct tsg_gk20a *tsg;
+	u64 *bitmap;
+	int tsgid;
+	/* pid at user level corresponds to kernel tgid */
+	pid_t tgid = (pid_t)arg->pid;
+	int err = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
+			(pid_t)arg->pid, arg->size, arg->buffer);
+
+	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
+		arg->size = sched->bitmap_size;
+		return -ENOSPC;
+	}
+
+	bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL);
+	if (!bitmap)
+		return -ENOMEM;
+
+	mutex_lock(&f->tsg_inuse_mutex);
+	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
+		tsg = &f->tsg[tsgid];
+		if ((tsg->in_use) && (tsg->tgid == tgid))
+			NVGPU_SCHED_SET(tsgid, bitmap);
+	}
+	mutex_unlock(&f->tsg_inuse_mutex);
+
+	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
+		bitmap, sched->bitmap_size))
+		err = -EFAULT;
+
+	kfree(bitmap);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_get_params_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+	int err = -ENXIO;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	mutex_lock(&f->tsg_inuse_mutex);
+	tsg = &f->tsg[tsgid];
+	if (!tsg->in_use)
+		goto unlock_in_use;
+
+	mutex_lock(&sched->status_lock);
+	if (!NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
+		gk20a_dbg(gpu_dbg_sched, "tsgid=%u not active", tsgid);
+		goto unlock_status;
+	}
+
+	arg->pid = tsg->tgid;	/* kernel tgid corresponds to user pid */
+	arg->runlist_interleave = tsg->interleave_level;
+	arg->timeslice = tsg->timeslice_us;
+
+	if (tsg->tsg_gr_ctx) {
+		arg->graphics_preempt_mode =
+			tsg->tsg_gr_ctx->graphics_preempt_mode;
+		arg->compute_preempt_mode =
+			tsg->tsg_gr_ctx->compute_preempt_mode;
+	} else {
+		arg->graphics_preempt_mode = 0;
+		arg->compute_preempt_mode = 0;
+	}
+
+	err = 0;
+
+unlock_status:
+	mutex_unlock(&sched->status_lock);
+
+unlock_in_use:
+	mutex_unlock(&f->tsg_inuse_mutex);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
+	struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_timeslice_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+	int err = -ENXIO;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	mutex_lock(&f->tsg_inuse_mutex);
+	tsg = &f->tsg[tsgid];
+	if (!tsg->in_use)
+		goto unlock_in_use;
+
+	mutex_lock(&sched->status_lock);
+	if (NVGPU_SCHED_ISSET(tsgid, sched->recent_tsg_bitmap)) {
+		gk20a_dbg(gpu_dbg_sched, "tsgid=%u was re-allocated", tsgid);
+		goto unlock_status;
+	}
+
+	err = gk20a_busy(g->dev);
+	if (err)
+		goto unlock_status;
+
+	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
+
+	gk20a_idle(g->dev);
+
+unlock_status:
+	mutex_unlock(&sched->status_lock);
+
+unlock_in_use:
+	mutex_unlock(&f->tsg_inuse_mutex);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
+	struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_runlist_interleave_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+	int err = -ENXIO;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	mutex_lock(&f->tsg_inuse_mutex);
+	tsg = &f->tsg[tsgid];
+	if (!tsg->in_use)
+		goto unlock_in_use;
+
+	mutex_lock(&sched->status_lock);
+	if (NVGPU_SCHED_ISSET(tsgid, sched->recent_tsg_bitmap)) {
+		gk20a_dbg(gpu_dbg_sched, "tsgid=%u was re-allocated", tsgid);
+		goto unlock_status;
+	}
+
+	err = gk20a_busy(g->dev);
+	if (err)
+		goto unlock_status;
+
+	err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
+
+	gk20a_idle(g->dev);
+
+unlock_status:
+	mutex_unlock(&sched->status_lock);
+
+unlock_in_use:
+	mutex_unlock(&f->tsg_inuse_mutex);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
+{
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
+
+	mutex_lock(&sched->control_lock);
+	sched->control_locked = true;
+	mutex_unlock(&sched->control_lock);
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
+{
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
+
+	mutex_lock(&sched->control_lock);
+	sched->control_locked = false;
+	mutex_unlock(&sched->control_lock);
+	return 0;
+}
+
+int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
+{
+	struct gk20a *g = container_of(inode->i_cdev,
+				struct gk20a, sched.cdev);
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	int err;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
+
+	if (!sched->sw_ready) {
+		err = gk20a_busy(g->dev);
+		if (err)
+			return err;
+
+		gk20a_idle(g->dev);
+	}
+
+	if (!mutex_trylock(&sched->busy_lock))
+		return -EBUSY;
+
+	memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
+			sched->bitmap_size);
+
+	filp->private_data = sched;
+	gk20a_dbg(gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
+
+	return 0;
+}
+
+long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
+	unsigned long arg)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	struct gk20a *g = sched->g;
+	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
+	int err = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
+		(_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	switch (cmd) {
+	case NVGPU_SCHED_IOCTL_GET_TSGS:
+		err = gk20a_sched_dev_ioctl_get_tsgs(sched,
+			(struct nvgpu_sched_get_tsgs_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
+		err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched,
+			(struct nvgpu_sched_get_tsgs_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
+		err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched,
+			(struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
+		err = gk20a_sched_dev_ioctl_get_params(sched,
+			(struct nvgpu_sched_tsg_get_params_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
+		err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched,
+			(struct nvgpu_sched_tsg_timeslice_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
+		err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched,
+			(struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
+		err = gk20a_sched_dev_ioctl_lock_control(sched);
+		break;
+	case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
+		err = gk20a_sched_dev_ioctl_unlock_control(sched);
+		break;
+	default:
+		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
+			cmd);
+		err = -ENOTTY;
+	}
+
+	/* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
+	 * purpose with NULL buffer and/or zero size to discover TSG bitmap
+	 * size. We need to update user arguments in this case too, even
+	 * if we return an error.
+	 */
+	if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
+		if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
+			err = -EFAULT;
+	}
+
+	return err;
+}
+
+int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
+
+	/* unlock control */
+	mutex_lock(&sched->control_lock);
+	sched->control_locked = false;
+	mutex_unlock(&sched->control_lock);
+
+	mutex_unlock(&sched->busy_lock);
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a *g = gk20a_get_platform(dev)->g;
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+
+	int n = sched->bitmap_size / sizeof(u64);
+	int i;
+	int err;
+
+	err = gk20a_busy(g->dev);
+	if (err)
+		return err;
+
+	seq_printf(s, "control_locked=%d\n", sched->control_locked);
+	seq_printf(s, "busy=%d\n", mutex_is_locked(&sched->busy_lock));
+	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
+
+	mutex_lock(&sched->status_lock);
+
+	seq_puts(s, "active_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
+
+	seq_puts(s, "recent_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
+
+	mutex_unlock(&sched->status_lock);
+
+	gk20a_idle(g->dev);
+
+	return 0;
+}
+
+static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_sched_debugfs_fops = {
+	.open		= gk20a_sched_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_sched_debugfs_init(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
+			dev, &gk20a_sched_debugfs_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	int err;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	if (!sched->sw_ready) {
+		err = gk20a_busy(g->dev);
+		if (err) {
+			WARN_ON(err);
+			return;
+		}
+
+		gk20a_idle(g->dev);
+	}
+
+	mutex_lock(&sched->status_lock);
+	NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
+	NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
+	sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
+	mutex_unlock(&sched->status_lock);
+	wake_up_interruptible(&sched->readout_wq);
+}
+
+void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	mutex_lock(&sched->status_lock);
+	NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
+
+	/* clear recent_tsg_bitmap as well: if app manager did not
+	 * notice that TSG was previously added, no need to notify it
+	 * if the TSG has been released in the meantime. If the
+	 * TSG gets reallocated, app manager will be notified as usual.
+	 */
+	NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
+
+	/* do not set event_pending, we only want to notify app manager
+	 * when TSGs are added, so that it can apply sched params
+	 */
+	mutex_unlock(&sched->status_lock);
+}
+
+int gk20a_sched_ctrl_init(struct gk20a *g)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	struct fifo_gk20a *f = &g->fifo;
+
+	if (sched->sw_ready)
+		return 0;
+
+	sched->g = g;
+	sched->bitmap_size = roundup(f->num_channels, 64) / 8;
+	sched->status = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
+			g, sched, sched->bitmap_size);
+
+	sched->active_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL);
+	if (!sched->active_tsg_bitmap)
+		goto fail_active;
+
+	sched->recent_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL);
+	if (!sched->recent_tsg_bitmap)
+		goto fail_recent;
+
+	init_waitqueue_head(&sched->readout_wq);
+	mutex_init(&sched->status_lock);
+	mutex_init(&sched->control_lock);
+	mutex_init(&sched->busy_lock);
+
+	sched->sw_ready = true;
+
+	return 0;
+
+fail_recent:
+	kfree(sched->active_tsg_bitmap);
+
+fail_active:
+	return -ENOMEM;
+}
+
+void gk20a_sched_ctrl_cleanup(struct gk20a *g)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+
+	kfree(sched->active_tsg_bitmap);
+	kfree(sched->recent_tsg_bitmap);
+	sched->active_tsg_bitmap = NULL;
+	sched->recent_tsg_bitmap = NULL;
+	sched->sw_ready = false;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
new file mode 100644
index 00000000..8f533056
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SCHED_GK20A_H
+#define __SCHED_GK20A_H
+
+struct gk20a;
+struct gpu_ops;
+struct tsg_gk20a;
+
+struct gk20a_sched_ctrl {
+	struct gk20a *g;
+
+	struct mutex control_lock;
+	bool control_locked;
+	bool sw_ready;
+	struct mutex status_lock;
+	struct mutex busy_lock;
+
+	u64 status;
+
+	size_t bitmap_size;
+	u64 *active_tsg_bitmap;
+	u64 *recent_tsg_bitmap;
+
+	wait_queue_head_t readout_wq;
+};
+
+int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
+int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
+long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
+ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
+unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
+
+void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
+void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
+int gk20a_sched_ctrl_init(struct gk20a *);
+
+void gk20a_sched_debugfs_init(struct device *dev);
+void gk20a_sched_ctrl_cleanup(struct gk20a *g);
+
+#endif /* __SCHED_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 0fa93da9..af8f0f7b 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -338,7 +338,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
 	return err;
 }
 
-static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
+int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
 {
 	struct gk20a *g = tsg->g;
 	int ret;
@@ -349,6 +349,8 @@ static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
 	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
 		ret = g->ops.fifo.set_runlist_interleave(g, tsg->tsgid,
 							true, 0, level);
+		if (!ret)
+			tsg->interleave_level = level;
 		break;
 	default:
 		ret = -EINVAL;
@@ -358,7 +360,7 @@ static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
 	return ret ? ret : g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
 }
 
-static int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
+int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
 {
 	struct gk20a *g = tsg->g;
 
@@ -369,6 +371,8 @@ static int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
 	gk20a_channel_get_timescale_from_timeslice(g, timeslice,
 			&tsg->timeslice_timeout, &tsg->timeslice_scale);
 
+	tsg->timeslice_us = timeslice;
+
 	return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
 }
 
@@ -421,11 +425,14 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
 	tsg->timeslice_timeout = 0;
 	tsg->timeslice_scale = 0;
 	tsg->runlist_id = ~0;
+	tsg->tgid = current->tgid;
 
 	filp->private_data = tsg;
 
 	gk20a_dbg(gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid);
 
+	gk20a_sched_ctrl_tsg_added(g, tsg);
+
 	return 0;
 }
 
@@ -456,6 +463,7 @@ static void gk20a_tsg_release(struct kref *ref)
 		tsg->vm = NULL;
 	}
 
+	gk20a_sched_ctrl_tsg_removed(g, tsg);
 	release_used_tsg(&g->fifo, tsg);
 
 	tsg->runlist_id = ~0;
@@ -470,6 +478,81 @@ int gk20a_tsg_dev_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static int gk20a_tsg_ioctl_set_priority(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_set_priority_args *arg)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	int err;
+
+	mutex_lock(&sched->control_lock);
+	if (sched->control_locked) {
+		err = -EPERM;
+		goto done;
+	}
+
+	err = gk20a_busy(g->dev);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "failed to power on gpu");
+		goto done;
+	}
+
+	err = gk20a_tsg_set_priority(g, tsg, arg->priority);
+
+	gk20a_idle(g->dev);
+done:
+	mutex_unlock(&sched->control_lock);
+	return err;
+}
+
+static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	int err;
+
+	mutex_lock(&sched->control_lock);
+	if (sched->control_locked) {
+		err = -EPERM;
+		goto done;
+	}
+	err = gk20a_busy(g->dev);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "failed to power on gpu");
+		goto done;
+	}
+
+	err = gk20a_tsg_set_runlist_interleave(tsg, arg->level);
+
+	gk20a_idle(g->dev);
+done:
+	mutex_unlock(&sched->control_lock);
+	return err;
+}
+
+static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
+{
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	int err;
+
+	mutex_lock(&sched->control_lock);
+	if (sched->control_locked) {
+		err = -EPERM;
+		goto done;
+	}
+	err = gk20a_busy(g->dev);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "failed to power on gpu");
+		goto done;
+	}
+	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
+	gk20a_idle(g->dev);
+done:
+	mutex_unlock(&sched->control_lock);
+	return err;
+}
+
+
 long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -561,8 +644,8 @@ long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 
 	case NVGPU_IOCTL_TSG_SET_PRIORITY:
 		{
-		err = gk20a_tsg_set_priority(g, tsg,
-			((struct nvgpu_set_priority_args *)buf)->priority);
+		err = gk20a_tsg_ioctl_set_priority(g, tsg,
+			(struct nvgpu_set_priority_args *)buf);
 		break;
 		}
 
@@ -574,30 +657,14 @@ long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 		}
 
 	case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
-		{
-		err = gk20a_busy(g->dev);
-		if (err) {
-			gk20a_err(dev_from_gk20a(g),
-			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
-			return err;
-		}
-		err = gk20a_tsg_set_runlist_interleave(tsg,
-			((struct nvgpu_runlist_interleave_args *)buf)->level);
-		gk20a_idle(g->dev);
+		err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
+			(struct nvgpu_runlist_interleave_args *)buf);
 		break;
-		}
 
 	case NVGPU_IOCTL_TSG_SET_TIMESLICE:
 		{
-		err = gk20a_busy(g->dev);
-		if (err) {
-			gk20a_err(dev_from_gk20a(g),
-			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
-			return err;
-		}
-		err = g->ops.fifo.tsg_set_timeslice(tsg,
-			((struct nvgpu_timeslice_args *)buf)->timeslice_us);
-		gk20a_idle(g->dev);
+		err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
+			(struct nvgpu_timeslice_args *)buf);
 		break;
 		}
 
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index 57414690..2819dd1c 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -56,6 +56,7 @@ struct tsg_gk20a {
 	struct mutex event_id_list_lock;
 
 	u32 runlist_id;
+	pid_t tgid;
 };
 
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
@@ -66,5 +67,8 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch);
 
 void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
 				       int event_id);
+int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level);
+int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
+
 
 #endif /* __TSG_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index dc7c4320..a00d52de 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -427,6 +427,7 @@ int vgpu_pm_finalize_poweron(struct device *dev)
 	}
 
 	gk20a_ctxsw_trace_init(g);
+	gk20a_sched_ctrl_init(g);
 	gk20a_channel_resume(g);
 
 done:
@@ -600,6 +601,7 @@ int vgpu_remove(struct platform_device *pdev)
 		g->remove_support(dev);
 
 	vgpu_comm_deinit();
+	gk20a_sched_ctrl_cleanup(g);
 	gk20a_user_deinit(dev, &nvgpu_class);
 	gk20a_get_platform(dev)->g = NULL;
 	kfree(g);
-- 
cgit v1.2.2