1 files changed, 730 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
new file mode 100644
index 00000000..a335988a
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/wait.h>
+#include <linux/ktime.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <trace/events/gk20a.h>
+#include <uapi/linux/nvgpu.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "ctxsw_trace.h"
+#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE      (128*PAGE_SIZE)
+/* Userland-facing FIFO (one global + eventually one per VM) */
+struct gk20a_ctxsw_dev {
+        struct gk20a *g;
+        struct nvgpu_ctxsw_ring_header *hdr;
+        struct nvgpu_ctxsw_trace_entry *ents;
+        struct nvgpu_ctxsw_trace_filter filter;
+        bool write_enabled;
+        struct nvgpu_cond readout_wq;
+        size_t size;
+        u32 num_ents;
+        nvgpu_atomic_t vma_ref;
+        struct nvgpu_mutex write_lock;
+};
+struct gk20a_ctxsw_trace {
+        struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
+};
+static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
+{
+        return (hdr->write_idx == hdr->read_idx);
+}
+static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
+{
+        return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
+}
+static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
+{
+        return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
+}
+ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
+        loff_t *off)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
+        struct nvgpu_ctxsw_trace_entry __user *entry =
+                (struct nvgpu_ctxsw_trace_entry *) buf;
+        size_t copied = 0;
+        int err;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
+                "filp=%p buf=%p size=%zu", filp, buf, size);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        while (ring_is_empty(hdr)) {
+                nvgpu_mutex_release(&dev->write_lock);
+                if (filp->f_flags & O_NONBLOCK)
+                        return -EAGAIN;
+                err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
+                        !ring_is_empty(hdr), 0);
+                if (err)
+                        return err;
+                nvgpu_mutex_acquire(&dev->write_lock);
+        }
+        while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
+                if (ring_is_empty(hdr))
+                        break;
+                if (copy_to_user(entry, &dev->ents[hdr->read_idx],
+                        sizeof(*entry))) {
+                        nvgpu_mutex_release(&dev->write_lock);
+                        return -EFAULT;
+                }
+                hdr->read_idx++;
+                if (hdr->read_idx >= hdr->num_ents)
+                        hdr->read_idx = 0;
+                entry++;
+                copied += sizeof(*entry);
+                size -= sizeof(*entry);
+        }
+        nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
+                hdr->read_idx);
+        *off = hdr->read_idx;
+        nvgpu_mutex_release(&dev->write_lock);
+        return copied;
+}
+static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
+{
+        struct gk20a *g = dev->g;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->write_enabled = true;
+        nvgpu_mutex_release(&dev->write_lock);
+        dev->g->ops.fecs_trace.enable(dev->g);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
+{
+        struct gk20a *g = dev->g;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
+        dev->g->ops.fecs_trace.disable(dev->g);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->write_enabled = false;
+        nvgpu_mutex_release(&dev->write_lock);
+        return 0;
+}
+static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
+                                        size_t size)
+{
+        struct gk20a *g = dev->g;
+        void *buf;
+        int err;
+        if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
+                return -EBUSY;
+        err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
+        if (err)
+                return err;
+        dev->hdr = buf;
+        dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
+        dev->size = size;
+        dev->num_ents = dev->hdr->num_ents;
+        nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
+                dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
+        return 0;
+}
+int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
+                void **buf, size_t *size)
+{
+        struct nvgpu_ctxsw_ring_header *hdr;
+        *size = roundup(*size, PAGE_SIZE);
+        hdr = vmalloc_user(*size);
+        if (!hdr)
+                return -ENOMEM;
+        hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
+        hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
+        hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
+                / sizeof(struct nvgpu_ctxsw_trace_entry);
+        hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
+        hdr->drop_count = 0;
+        hdr->read_idx = 0;
+        hdr->write_idx = 0;
+        hdr->write_seqno = 0;
+        *buf = hdr;
+        return 0;
+}
+int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
+{
+        struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
+        nvgpu_vfree(g, dev->hdr);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
+        struct nvgpu_ctxsw_ring_setup_args *args)
+{
+        struct gk20a *g = dev->g;
+        size_t size = args->size;
+        int ret;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
+        if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
+                return -EINVAL;
+        nvgpu_mutex_acquire(&dev->write_lock);
+        ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
+        nvgpu_mutex_release(&dev->write_lock);
+        return ret;
+}
+static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
+        struct nvgpu_ctxsw_trace_filter_args *args)
+{
+        struct gk20a *g = dev->g;
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->filter = args->filter;
+        nvgpu_mutex_release(&dev->write_lock);
+        if (g->ops.fecs_trace.set_filter)
+                g->ops.fecs_trace.set_filter(g, &dev->filter);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
+        struct nvgpu_ctxsw_trace_filter_args *args)
+{
+        nvgpu_mutex_acquire(&dev->write_lock);
+        args->filter = dev->filter;
+        nvgpu_mutex_release(&dev->write_lock);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
+{
+        struct gk20a *g = dev->g;
+        int err;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+        err = gk20a_busy(g);
+        if (err)
+                return err;
+        if (g->ops.fecs_trace.flush)
+                err = g->ops.fecs_trace.flush(g);
+        if (likely(!err))
+                err = g->ops.fecs_trace.poll(g);
+        gk20a_idle(g);
+        return err;
+}
+int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
+{
+        struct nvgpu_os_linux *l;
+        struct gk20a *g;
+        struct gk20a_ctxsw_trace *trace;
+        struct gk20a_ctxsw_dev *dev;
+        int err;
+        size_t size;
+        u32 n;
+        /* only one VM for now */
+        const int vmid = 0;
+        l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
+        g = gk20a_get(&l->g);
+        if (!g)
+                return -ENODEV;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
+        if (!capable(CAP_SYS_ADMIN)) {
+                err = -EPERM;
+                goto free_ref;
+        }
+        err = gk20a_busy(g);
+        if (err)
+                goto free_ref;
+        trace = g->ctxsw_trace;
+        if (!trace) {
+                err = -ENODEV;
+                goto idle;
+        }
+        /* Allow only one user for this device */
+        dev = &trace->devs[vmid];
+        nvgpu_mutex_acquire(&dev->write_lock);
+        if (dev->hdr) {
+                err = -EBUSY;
+                goto done;
+        }
+        /* By default, allocate ring buffer big enough to accommodate
+         * FECS records with default event filter */
+        /* enable all traces by default */
+        NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
+        /* compute max number of entries generated with this filter */
+        n = g->ops.fecs_trace.max_entries(g, &dev->filter);
+        size = sizeof(struct nvgpu_ctxsw_ring_header) +
+                        n * sizeof(struct nvgpu_ctxsw_trace_entry);
+        nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
+                size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
+        err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
+        if (!err) {
+                filp->private_data = dev;
+                nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
+                        filp, dev, size);
+        }
+done:
+        nvgpu_mutex_release(&dev->write_lock);
+idle:
+        gk20a_idle(g);
+free_ref:
+        if (err)
+                gk20a_put(g);
+        return err;
+}
+int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
+        g->ops.fecs_trace.disable(g);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->write_enabled = false;
+        nvgpu_mutex_release(&dev->write_lock);
+        if (dev->hdr) {
+                dev->g->ops.fecs_trace.free_user_buffer(dev->g);
+                dev->hdr = NULL;
+        }
+        gk20a_put(g);
+        return 0;
+}
+long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
+        unsigned long arg)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
+        int err = 0;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
+        if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
+                (_IOC_NR(cmd) == 0) ||
+                (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
+                (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
+                return -EINVAL;
+        memset(buf, 0, sizeof(buf));
+        if (_IOC_DIR(cmd) & _IOC_WRITE) {
+                if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
+                        return -EFAULT;
+        }
+        switch (cmd) {
+        case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
+                err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
+                break;
+        case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
+                err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
+                break;
+        case NVGPU_CTXSW_IOCTL_RING_SETUP:
+                err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
+                        (struct nvgpu_ctxsw_ring_setup_args *) buf);
+                break;
+        case NVGPU_CTXSW_IOCTL_SET_FILTER:
+                err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
+                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
+                break;
+        case NVGPU_CTXSW_IOCTL_GET_FILTER:
+                err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
+                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
+                break;
+        case NVGPU_CTXSW_IOCTL_POLL:
+                err = gk20a_ctxsw_dev_ioctl_poll(dev);
+                break;
+        default:
+                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
+                        cmd);
+                err = -ENOTTY;
+        }
+        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+                err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
+        return err;
+}
+unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
+        unsigned int mask = 0;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+        nvgpu_mutex_acquire(&dev->write_lock);
+        poll_wait(filp, &dev->readout_wq.wq, wait);
+        if (!ring_is_empty(hdr))
+                mask |= POLLIN | POLLRDNORM;
+        nvgpu_mutex_release(&dev->write_lock);
+        return mask;
+}
+static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
+{
+        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
+        struct gk20a *g = dev->g;
+        nvgpu_atomic_inc(&dev->vma_ref);
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
+                nvgpu_atomic_read(&dev->vma_ref));
+}
+static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
+{
+        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
+        struct gk20a *g = dev->g;
+        nvgpu_atomic_dec(&dev->vma_ref);
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
+                nvgpu_atomic_read(&dev->vma_ref));
+}
+static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
+        .open = gk20a_ctxsw_dev_vma_open,
+        .close = gk20a_ctxsw_dev_vma_close,
+};
+int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
+                                struct vm_area_struct *vma)
+{
+        return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
+}
+int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        int ret;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
+                vma->vm_start, vma->vm_end);
+        ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
+        if (likely(!ret)) {
+                vma->vm_private_data = dev;
+                vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
+                vma->vm_ops->open(vma);
+        }
+        return ret;
+}
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+static int gk20a_ctxsw_init_devs(struct gk20a *g)
+{
+        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
+        struct gk20a_ctxsw_dev *dev = trace->devs;
+        int err;
+        int i;
+        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
+                dev->g = g;
+                dev->hdr = NULL;
+                dev->write_enabled = false;
+                nvgpu_cond_init(&dev->readout_wq);
+                err = nvgpu_mutex_init(&dev->write_lock);
+                if (err)
+                        return err;
+                nvgpu_atomic_set(&dev->vma_ref, 0);
+                dev++;
+        }
+        return 0;
+}
+#endif
+int gk20a_ctxsw_trace_init(struct gk20a *g)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
+        int err;
+        nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
+        /* if tracing is not supported, skip this */
+        if (!g->ops.fecs_trace.init)
+                return 0;
+        if (likely(trace))
+                return 0;
+        trace = nvgpu_kzalloc(g, sizeof(*trace));
+        if (unlikely(!trace))
+                return -ENOMEM;
+        g->ctxsw_trace = trace;
+        err = gk20a_ctxsw_init_devs(g);
+        if (err)
+                goto fail;
+        err = g->ops.fecs_trace.init(g);
+        if (unlikely(err))
+                goto fail;
+        return 0;
+fail:
+        memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
+        nvgpu_kfree(g, trace);
+        g->ctxsw_trace = NULL;
+        return err;
+#else
+        return 0;
+#endif
+}
+void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct gk20a_ctxsw_trace *trace;
+        struct gk20a_ctxsw_dev *dev;
+        int i;
+        if (!g->ctxsw_trace)
+                return;
+        trace = g->ctxsw_trace;
+        dev = trace->devs;
+        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
+                nvgpu_mutex_destroy(&dev->write_lock);
+                dev++;
+        }
+        nvgpu_kfree(g, g->ctxsw_trace);
+        g->ctxsw_trace = NULL;
+        g->ops.fecs_trace.deinit(g);
+#endif
+}
+int gk20a_ctxsw_trace_write(struct gk20a *g,
+                struct nvgpu_ctxsw_trace_entry *entry)
+{
+        struct nvgpu_ctxsw_ring_header *hdr;
+        struct gk20a_ctxsw_dev *dev;
+        int ret = 0;
+        const char *reason;
+        u32 write_idx;
+        if (!g->ctxsw_trace)
+                return 0;
+        if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
+                return -ENODEV;
+        dev = &g->ctxsw_trace->devs[entry->vmid];
+        hdr = dev->hdr;
+        nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+                "dev=%p hdr=%p", dev, hdr);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        if (unlikely(!hdr)) {
+                /* device has been released */
+                ret = -ENODEV;
+                goto done;
+        }
+        write_idx = hdr->write_idx;
+        if (write_idx >= dev->num_ents) {
+                nvgpu_err(dev->g,
+                        "write_idx=%u out of range [0..%u]",
+                        write_idx, dev->num_ents);
+                ret = -ENOSPC;
+                reason = "write_idx out of range";
+                goto disable;
+        }
+        entry->seqno = hdr->write_seqno++;
+        if (!dev->write_enabled) {
+                ret = -EBUSY;
+                reason = "write disabled";
+                goto drop;
+        }
+        if (unlikely(ring_is_full(hdr))) {
+                ret = -ENOSPC;
+                reason = "user fifo full";
+                goto drop;
+        }
+        if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
+                reason = "filtered out";
+                goto filter;
+        }
+        nvgpu_log(g, gpu_dbg_ctxsw,
+                "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
+                entry->seqno, entry->context_id, entry->pid,
+                entry->tag, entry->timestamp);
+        dev->ents[write_idx] = *entry;
+        /* ensure record is written before updating write index */
+        nvgpu_smp_wmb();
+        write_idx++;
+        if (unlikely(write_idx >= hdr->num_ents))
+                write_idx = 0;
+        hdr->write_idx = write_idx;
+        nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
+                hdr->read_idx, hdr->write_idx, ring_len(hdr));
+        nvgpu_mutex_release(&dev->write_lock);
+        return ret;
+disable:
+        g->ops.fecs_trace.disable(g);
+drop:
+        hdr->drop_count++;
+filter:
+        nvgpu_log(g, gpu_dbg_ctxsw,
+                        "dropping seqno=%d context_id=%08x pid=%lld "
+                        "tag=%x time=%llx (%s)",
+                        entry->seqno, entry->context_id, entry->pid,
+                        entry->tag, entry->timestamp, reason);
+done:
+        nvgpu_mutex_release(&dev->write_lock);
+        return ret;
+}
+void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
+{
+        struct gk20a_ctxsw_dev *dev;
+        if (!g->ctxsw_trace)
+                return;
+        dev = &g->ctxsw_trace->devs[vmid];
+        nvgpu_cond_signal_interruptible(&dev->readout_wq);
+}
+void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct nvgpu_ctxsw_trace_entry entry = {
+                .vmid = 0,
+                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
+                .context_id = 0,
+                .pid = ch->tgid,
+        };
+        if (!g->ctxsw_trace)
+                return;
+        g->ops.ptimer.read_ptimer(g, &entry.timestamp);
+        gk20a_ctxsw_trace_write(g, &entry);
+        gk20a_ctxsw_trace_wake_up(g, 0);
+#endif
+        trace_gk20a_channel_reset(ch->chid, ch->tsgid);
+}
+void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct nvgpu_ctxsw_trace_entry entry = {
+                .vmid = 0,
+                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
+                .context_id = 0,
+                .pid = tsg->tgid,
+        };
+        if (!g->ctxsw_trace)
+                return;
+        g->ops.ptimer.read_ptimer(g, &entry.timestamp);
+        gk20a_ctxsw_trace_write(g, &entry);
+        gk20a_ctxsw_trace_wake_up(g, 0);
+#endif
+        trace_gk20a_channel_reset(~0, tsg->tsgid);
+}

diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c new file mode 100644 index 00000000..a335988a --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
@@ -0,0 +1,730 @@
	1	/*
	2	* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <linux/wait.h>
	18	#include <linux/ktime.h>
	19	#include <linux/uaccess.h>
	20	#include <linux/poll.h>
	21	#include <trace/events/gk20a.h>
	22	#include <uapi/linux/nvgpu.h>
	23
	24	#include "gk20a/gk20a.h"
	25	#include "gk20a/gr_gk20a.h"
	26
	27	#include <nvgpu/kmem.h>
	28	#include <nvgpu/log.h>
	29	#include <nvgpu/atomic.h>
	30	#include <nvgpu/barrier.h>
	31
	32	#include "platform_gk20a.h"
	33	#include "os_linux.h"
	34	#include "ctxsw_trace.h"
	35
	36	#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
	37	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
	38
	39	#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
	40
	41	/* Userland-facing FIFO (one global + eventually one per VM) */
	42	struct gk20a_ctxsw_dev {
	43	struct gk20a *g;
	44
	45	struct nvgpu_ctxsw_ring_header *hdr;
	46	struct nvgpu_ctxsw_trace_entry *ents;
	47	struct nvgpu_ctxsw_trace_filter filter;
	48	bool write_enabled;
	49	struct nvgpu_cond readout_wq;
	50	size_t size;
	51	u32 num_ents;
	52
	53	nvgpu_atomic_t vma_ref;
	54
	55	struct nvgpu_mutex write_lock;
	56	};
	57
	58
	59	struct gk20a_ctxsw_trace {
	60	struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
	61	};
	62
	63	static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
	64	{
	65	return (hdr->write_idx == hdr->read_idx);
	66	}
	67
	68	static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
	69	{
	70	return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
	71	}
	72
	73	static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
	74	{
	75	return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
	76	}
	77
	78	ssize_t gk20a_ctxsw_dev_read(struct file filp, char __user buf, size_t size,
	79	loff_t *off)
	80	{
	81	struct gk20a_ctxsw_dev *dev = filp->private_data;
	82	struct gk20a *g = dev->g;
	83	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
	84	struct nvgpu_ctxsw_trace_entry __user *entry =
	85	(struct nvgpu_ctxsw_trace_entry *) buf;
	86	size_t copied = 0;
	87	int err;
	88
	89	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw,
	90	"filp=%p buf=%p size=%zu", filp, buf, size);
	91
	92	nvgpu_mutex_acquire(&dev->write_lock);
	93	while (ring_is_empty(hdr)) {
	94	nvgpu_mutex_release(&dev->write_lock);
	95	if (filp->f_flags & O_NONBLOCK)
	96	return -EAGAIN;
	97	err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
	98	!ring_is_empty(hdr), 0);
	99	if (err)
	100	return err;
	101	nvgpu_mutex_acquire(&dev->write_lock);
	102	}
	103
	104	while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
	105	if (ring_is_empty(hdr))
	106	break;
	107
	108	if (copy_to_user(entry, &dev->ents[hdr->read_idx],
	109	sizeof(*entry))) {
	110	nvgpu_mutex_release(&dev->write_lock);
	111	return -EFAULT;
	112	}
	113
	114	hdr->read_idx++;
	115	if (hdr->read_idx >= hdr->num_ents)
	116	hdr->read_idx = 0;
	117
	118	entry++;
	119	copied += sizeof(*entry);
	120	size -= sizeof(*entry);
	121	}
	122
	123	nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
	124	hdr->read_idx);
	125
	126	*off = hdr->read_idx;
	127	nvgpu_mutex_release(&dev->write_lock);
	128
	129	return copied;
	130	}
	131
	132	static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
	133	{
	134	struct gk20a *g = dev->g;
	135
	136	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "trace enabled");
	137	nvgpu_mutex_acquire(&dev->write_lock);
	138	dev->write_enabled = true;
	139	nvgpu_mutex_release(&dev->write_lock);
	140	dev->g->ops.fecs_trace.enable(dev->g);
	141	return 0;
	142	}
	143
	144	static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
	145	{
	146	struct gk20a *g = dev->g;
	147
	148	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "trace disabled");
	149	dev->g->ops.fecs_trace.disable(dev->g);
	150	nvgpu_mutex_acquire(&dev->write_lock);
	151	dev->write_enabled = false;
	152	nvgpu_mutex_release(&dev->write_lock);
	153	return 0;
	154	}
	155
	156	static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
	157	size_t size)
	158	{
	159	struct gk20a *g = dev->g;
	160	void *buf;
	161	int err;
	162
	163	if ((dev->write_enabled) \|\| (nvgpu_atomic_read(&dev->vma_ref)))
	164	return -EBUSY;
	165
	166	err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
	167	if (err)
	168	return err;
	169
	170
	171	dev->hdr = buf;
	172	dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
	173	dev->size = size;
	174	dev->num_ents = dev->hdr->num_ents;
	175
	176	nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
	177	dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
	178	return 0;
	179	}
	180
	181	int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
	182	void *buf, size_t size)
	183	{
	184	struct nvgpu_ctxsw_ring_header *hdr;
	185
	186	size = roundup(size, PAGE_SIZE);
	187	hdr = vmalloc_user(*size);
	188	if (!hdr)
	189	return -ENOMEM;
	190
	191	hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
	192	hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
	193	hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
	194	/ sizeof(struct nvgpu_ctxsw_trace_entry);
	195	hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
	196	hdr->drop_count = 0;
	197	hdr->read_idx = 0;
	198	hdr->write_idx = 0;
	199	hdr->write_seqno = 0;
	200
	201	*buf = hdr;
	202	return 0;
	203	}
	204
	205	int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
	206	{
	207	struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
	208
	209	nvgpu_vfree(g, dev->hdr);
	210	return 0;
	211	}
	212
	213	static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
	214	struct nvgpu_ctxsw_ring_setup_args *args)
	215	{
	216	struct gk20a *g = dev->g;
	217	size_t size = args->size;
	218	int ret;
	219
	220	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "size=%zu", size);
	221
	222	if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
	223	return -EINVAL;
	224
	225	nvgpu_mutex_acquire(&dev->write_lock);
	226	ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
	227	nvgpu_mutex_release(&dev->write_lock);
	228
	229	return ret;
	230	}
	231
	232	static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
	233	struct nvgpu_ctxsw_trace_filter_args *args)
	234	{
	235	struct gk20a *g = dev->g;
	236
	237	nvgpu_mutex_acquire(&dev->write_lock);
	238	dev->filter = args->filter;
	239	nvgpu_mutex_release(&dev->write_lock);
	240
	241	if (g->ops.fecs_trace.set_filter)
	242	g->ops.fecs_trace.set_filter(g, &dev->filter);
	243	return 0;
	244	}
	245
	246	static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
	247	struct nvgpu_ctxsw_trace_filter_args *args)
	248	{
	249	nvgpu_mutex_acquire(&dev->write_lock);
	250	args->filter = dev->filter;
	251	nvgpu_mutex_release(&dev->write_lock);
	252
	253	return 0;
	254	}
	255
	256	static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
	257	{
	258	struct gk20a *g = dev->g;
	259	int err;
	260
	261	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, " ");
	262
	263	err = gk20a_busy(g);
	264	if (err)
	265	return err;
	266
	267	if (g->ops.fecs_trace.flush)
	268	err = g->ops.fecs_trace.flush(g);
	269
	270	if (likely(!err))
	271	err = g->ops.fecs_trace.poll(g);
	272
	273	gk20a_idle(g);
	274	return err;
	275	}
	276
	277	int gk20a_ctxsw_dev_open(struct inode inode, struct file filp)
	278	{
	279	struct nvgpu_os_linux *l;
	280	struct gk20a *g;
	281	struct gk20a_ctxsw_trace *trace;
	282	struct gk20a_ctxsw_dev *dev;
	283	int err;
	284	size_t size;
	285	u32 n;
	286
	287	/* only one VM for now */
	288	const int vmid = 0;
	289
	290	l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
	291	g = gk20a_get(&l->g);
	292	if (!g)
	293	return -ENODEV;
	294
	295	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "g=%p", g);
	296
	297	if (!capable(CAP_SYS_ADMIN)) {
	298	err = -EPERM;
	299	goto free_ref;
	300	}
	301
	302	err = gk20a_busy(g);
	303	if (err)
	304	goto free_ref;
	305
	306	trace = g->ctxsw_trace;
	307	if (!trace) {
	308	err = -ENODEV;
	309	goto idle;
	310	}
	311
	312	/* Allow only one user for this device */
	313	dev = &trace->devs[vmid];
	314	nvgpu_mutex_acquire(&dev->write_lock);
	315	if (dev->hdr) {
	316	err = -EBUSY;
	317	goto done;
	318	}
	319
	320	/* By default, allocate ring buffer big enough to accommodate
	321	* FECS records with default event filter */
	322
	323	/* enable all traces by default */
	324	NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
	325
	326	/* compute max number of entries generated with this filter */
	327	n = g->ops.fecs_trace.max_entries(g, &dev->filter);
	328
	329	size = sizeof(struct nvgpu_ctxsw_ring_header) +
	330	n * sizeof(struct nvgpu_ctxsw_trace_entry);
	331	nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
	332	size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
	333
	334	err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
	335	if (!err) {
	336	filp->private_data = dev;
	337	nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
	338	filp, dev, size);
	339	}
	340
	341	done:
	342	nvgpu_mutex_release(&dev->write_lock);
	343
	344	idle:
	345	gk20a_idle(g);
	346	free_ref:
	347	if (err)
	348	gk20a_put(g);
	349	return err;
	350	}
	351
	352	int gk20a_ctxsw_dev_release(struct inode inode, struct file filp)
	353	{
	354	struct gk20a_ctxsw_dev *dev = filp->private_data;
	355	struct gk20a *g = dev->g;
	356
	357	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "dev: %p", dev);
	358
	359	g->ops.fecs_trace.disable(g);
	360
	361	nvgpu_mutex_acquire(&dev->write_lock);
	362	dev->write_enabled = false;
	363	nvgpu_mutex_release(&dev->write_lock);
	364
	365	if (dev->hdr) {
	366	dev->g->ops.fecs_trace.free_user_buffer(dev->g);
	367	dev->hdr = NULL;
	368	}
	369	gk20a_put(g);
	370	return 0;
	371	}
	372
	373	long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
	374	unsigned long arg)
	375	{
	376	struct gk20a_ctxsw_dev *dev = filp->private_data;
	377	struct gk20a *g = dev->g;
	378	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
	379	int err = 0;
	380
	381	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
	382
	383	if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) \|\|
	384	(_IOC_NR(cmd) == 0) \|\|
	385	(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) \|\|
	386	(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
	387	return -EINVAL;
	388
	389	memset(buf, 0, sizeof(buf));
	390	if (_IOC_DIR(cmd) & _IOC_WRITE) {
	391	if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
	392	return -EFAULT;
	393	}
	394
	395	switch (cmd) {
	396	case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
	397	err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
	398	break;
	399	case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
	400	err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
	401	break;
	402	case NVGPU_CTXSW_IOCTL_RING_SETUP:
	403	err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
	404	(struct nvgpu_ctxsw_ring_setup_args *) buf);
	405	break;
	406	case NVGPU_CTXSW_IOCTL_SET_FILTER:
	407	err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
	408	(struct nvgpu_ctxsw_trace_filter_args *) buf);
	409	break;
	410	case NVGPU_CTXSW_IOCTL_GET_FILTER:
	411	err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
	412	(struct nvgpu_ctxsw_trace_filter_args *) buf);
	413	break;
	414	case NVGPU_CTXSW_IOCTL_POLL:
	415	err = gk20a_ctxsw_dev_ioctl_poll(dev);
	416	break;
	417	default:
	418	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
	419	cmd);
	420	err = -ENOTTY;
	421	}
	422
	423	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
	424	err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
	425
	426	return err;
	427	}
	428
	429	unsigned int gk20a_ctxsw_dev_poll(struct file filp, poll_table wait)
	430	{
	431	struct gk20a_ctxsw_dev *dev = filp->private_data;
	432	struct gk20a *g = dev->g;
	433	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
	434	unsigned int mask = 0;
	435
	436	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, " ");
	437
	438	nvgpu_mutex_acquire(&dev->write_lock);
	439	poll_wait(filp, &dev->readout_wq.wq, wait);
	440	if (!ring_is_empty(hdr))
	441	mask \|= POLLIN \| POLLRDNORM;
	442	nvgpu_mutex_release(&dev->write_lock);
	443
	444	return mask;
	445	}
	446
	447	static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
	448	{
	449	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
	450	struct gk20a *g = dev->g;
	451
	452	nvgpu_atomic_inc(&dev->vma_ref);
	453	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "vma_ref=%d",
	454	nvgpu_atomic_read(&dev->vma_ref));
	455	}
	456
	457	static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
	458	{
	459	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
	460	struct gk20a *g = dev->g;
	461
	462	nvgpu_atomic_dec(&dev->vma_ref);
	463	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "vma_ref=%d",
	464	nvgpu_atomic_read(&dev->vma_ref));
	465	}
	466
	467	static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
	468	.open = gk20a_ctxsw_dev_vma_open,
	469	.close = gk20a_ctxsw_dev_vma_close,
	470	};
	471
	472	int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
	473	struct vm_area_struct *vma)
	474	{
	475	return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
	476	}
	477
	478	int gk20a_ctxsw_dev_mmap(struct file filp, struct vm_area_struct vma)
	479	{
	480	struct gk20a_ctxsw_dev *dev = filp->private_data;
	481	struct gk20a *g = dev->g;
	482	int ret;
	483
	484	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
	485	vma->vm_start, vma->vm_end);
	486
	487	ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
	488	if (likely(!ret)) {
	489	vma->vm_private_data = dev;
	490	vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
	491	vma->vm_ops->open(vma);
	492	}
	493
	494	return ret;
	495	}
	496
	497	#ifdef CONFIG_GK20A_CTXSW_TRACE
	498	static int gk20a_ctxsw_init_devs(struct gk20a *g)
	499	{
	500	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
	501	struct gk20a_ctxsw_dev *dev = trace->devs;
	502	int err;
	503	int i;
	504
	505	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
	506	dev->g = g;
	507	dev->hdr = NULL;
	508	dev->write_enabled = false;
	509	nvgpu_cond_init(&dev->readout_wq);
	510	err = nvgpu_mutex_init(&dev->write_lock);
	511	if (err)
	512	return err;
	513	nvgpu_atomic_set(&dev->vma_ref, 0);
	514	dev++;
	515	}
	516	return 0;
	517	}
	518	#endif
	519
	520	int gk20a_ctxsw_trace_init(struct gk20a *g)
	521	{
	522	#ifdef CONFIG_GK20A_CTXSW_TRACE
	523	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
	524	int err;
	525
	526	nvgpu_log(g, gpu_dbg_fn\|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
	527
	528	/* if tracing is not supported, skip this */
	529	if (!g->ops.fecs_trace.init)
	530	return 0;
	531
	532	if (likely(trace))
	533	return 0;
	534
	535	trace = nvgpu_kzalloc(g, sizeof(*trace));
	536	if (unlikely(!trace))
	537	return -ENOMEM;
	538	g->ctxsw_trace = trace;
	539
	540	err = gk20a_ctxsw_init_devs(g);
	541	if (err)
	542	goto fail;
	543
	544	err = g->ops.fecs_trace.init(g);
	545	if (unlikely(err))
	546	goto fail;
	547
	548	return 0;
	549
	550	fail:
	551	memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
	552	nvgpu_kfree(g, trace);
	553	g->ctxsw_trace = NULL;
	554	return err;
	555	#else
	556	return 0;
	557	#endif
	558	}
	559
	560	void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
	561	{
	562	#ifdef CONFIG_GK20A_CTXSW_TRACE
	563	struct gk20a_ctxsw_trace *trace;
	564	struct gk20a_ctxsw_dev *dev;
	565	int i;
	566
	567	if (!g->ctxsw_trace)
	568	return;
	569
	570	trace = g->ctxsw_trace;
	571	dev = trace->devs;
	572
	573	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
	574	nvgpu_mutex_destroy(&dev->write_lock);
	575	dev++;
	576	}
	577
	578	nvgpu_kfree(g, g->ctxsw_trace);
	579	g->ctxsw_trace = NULL;
	580
	581	g->ops.fecs_trace.deinit(g);
	582	#endif
	583	}
	584
	585	int gk20a_ctxsw_trace_write(struct gk20a *g,
	586	struct nvgpu_ctxsw_trace_entry *entry)
	587	{
	588	struct nvgpu_ctxsw_ring_header *hdr;
	589	struct gk20a_ctxsw_dev *dev;
	590	int ret = 0;
	591	const char *reason;
	592	u32 write_idx;
	593
	594	if (!g->ctxsw_trace)
	595	return 0;
	596
	597	if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
	598	return -ENODEV;
	599
	600	dev = &g->ctxsw_trace->devs[entry->vmid];
	601	hdr = dev->hdr;
	602
	603	nvgpu_log(g, gpu_dbg_fn \| gpu_dbg_ctxsw,
	604	"dev=%p hdr=%p", dev, hdr);
	605
	606	nvgpu_mutex_acquire(&dev->write_lock);
	607
	608	if (unlikely(!hdr)) {
	609	/* device has been released */
	610	ret = -ENODEV;
	611	goto done;
	612	}
	613
	614	write_idx = hdr->write_idx;
	615	if (write_idx >= dev->num_ents) {
	616	nvgpu_err(dev->g,
	617	"write_idx=%u out of range [0..%u]",
	618	write_idx, dev->num_ents);
	619	ret = -ENOSPC;
	620	reason = "write_idx out of range";
	621	goto disable;
	622	}
	623
	624	entry->seqno = hdr->write_seqno++;
	625
	626	if (!dev->write_enabled) {
	627	ret = -EBUSY;
	628	reason = "write disabled";
	629	goto drop;
	630	}
	631
	632	if (unlikely(ring_is_full(hdr))) {
	633	ret = -ENOSPC;
	634	reason = "user fifo full";
	635	goto drop;
	636	}
	637
	638	if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
	639	reason = "filtered out";
	640	goto filter;
	641	}
	642
	643	nvgpu_log(g, gpu_dbg_ctxsw,
	644	"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
	645	entry->seqno, entry->context_id, entry->pid,
	646	entry->tag, entry->timestamp);
	647
	648	dev->ents[write_idx] = *entry;
	649
	650	/* ensure record is written before updating write index */
	651	nvgpu_smp_wmb();
	652
	653	write_idx++;
	654	if (unlikely(write_idx >= hdr->num_ents))
	655	write_idx = 0;
	656	hdr->write_idx = write_idx;
	657	nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
	658	hdr->read_idx, hdr->write_idx, ring_len(hdr));
	659
	660	nvgpu_mutex_release(&dev->write_lock);
	661	return ret;
	662
	663	disable:
	664	g->ops.fecs_trace.disable(g);
	665
	666	drop:
	667	hdr->drop_count++;
	668
	669	filter:
	670	nvgpu_log(g, gpu_dbg_ctxsw,
	671	"dropping seqno=%d context_id=%08x pid=%lld "
	672	"tag=%x time=%llx (%s)",
	673	entry->seqno, entry->context_id, entry->pid,
	674	entry->tag, entry->timestamp, reason);
	675
	676	done:
	677	nvgpu_mutex_release(&dev->write_lock);
	678	return ret;
	679	}
	680
	681	void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
	682	{
	683	struct gk20a_ctxsw_dev *dev;
	684
	685	if (!g->ctxsw_trace)
	686	return;
	687
	688	dev = &g->ctxsw_trace->devs[vmid];
	689	nvgpu_cond_signal_interruptible(&dev->readout_wq);
	690	}
	691
	692	void gk20a_ctxsw_trace_channel_reset(struct gk20a g, struct channel_gk20a ch)
	693	{
	694	#ifdef CONFIG_GK20A_CTXSW_TRACE
	695	struct nvgpu_ctxsw_trace_entry entry = {
	696	.vmid = 0,
	697	.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
	698	.context_id = 0,
	699	.pid = ch->tgid,
	700	};
	701
	702	if (!g->ctxsw_trace)
	703	return;
	704
	705	g->ops.ptimer.read_ptimer(g, &entry.timestamp);
	706	gk20a_ctxsw_trace_write(g, &entry);
	707	gk20a_ctxsw_trace_wake_up(g, 0);
	708	#endif
	709	trace_gk20a_channel_reset(ch->chid, ch->tsgid);
	710	}
	711
	712	void gk20a_ctxsw_trace_tsg_reset(struct gk20a g, struct tsg_gk20a tsg)
	713	{
	714	#ifdef CONFIG_GK20A_CTXSW_TRACE
	715	struct nvgpu_ctxsw_trace_entry entry = {
	716	.vmid = 0,
	717	.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
	718	.context_id = 0,
	719	.pid = tsg->tgid,
	720	};
	721
	722	if (!g->ctxsw_trace)
	723	return;
	724
	725	g->ops.ptimer.read_ptimer(g, &entry.timestamp);
	726	gk20a_ctxsw_trace_write(g, &entry);
	727	gk20a_ctxsw_trace_wake_up(g, 0);
	728	#endif
	729	trace_gk20a_channel_reset(~0, tsg->tsgid);
	730	}