1 files changed, 720 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
new file mode 100644
index 00000000..8268bf60
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
@@ -0,0 +1,720 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/wait.h>
+#include <linux/ktime.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <trace/events/gk20a.h>
+#include <uapi/linux/nvgpu.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "ctxsw_trace.h"
+#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE      (128*PAGE_SIZE)
+/* Userland-facing FIFO (one global + eventually one per VM) */
+struct gk20a_ctxsw_dev {
+        struct gk20a *g;
+        struct nvgpu_ctxsw_ring_header *hdr;
+        struct nvgpu_ctxsw_trace_entry *ents;
+        struct nvgpu_ctxsw_trace_filter filter;
+        bool write_enabled;
+        struct nvgpu_cond readout_wq;
+        size_t size;
+        u32 num_ents;
+        nvgpu_atomic_t vma_ref;
+        struct nvgpu_mutex write_lock;
+};
+struct gk20a_ctxsw_trace {
+        struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
+};
+static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
+{
+        return (hdr->write_idx == hdr->read_idx);
+}
+static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
+{
+        return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
+}
+static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
+{
+        return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
+}
+ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
+        loff_t *off)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
+        struct nvgpu_ctxsw_trace_entry __user *entry =
+                (struct nvgpu_ctxsw_trace_entry *) buf;
+        size_t copied = 0;
+        int err;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
+                "filp=%p buf=%p size=%zu", filp, buf, size);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        while (ring_is_empty(hdr)) {
+                nvgpu_mutex_release(&dev->write_lock);
+                if (filp->f_flags & O_NONBLOCK)
+                        return -EAGAIN;
+                err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
+                        !ring_is_empty(hdr), 0);
+                if (err)
+                        return err;
+                nvgpu_mutex_acquire(&dev->write_lock);
+        }
+        while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
+                if (ring_is_empty(hdr))
+                        break;
+                if (copy_to_user(entry, &dev->ents[hdr->read_idx],
+                        sizeof(*entry))) {
+                        nvgpu_mutex_release(&dev->write_lock);
+                        return -EFAULT;
+                }
+                hdr->read_idx++;
+                if (hdr->read_idx >= hdr->num_ents)
+                        hdr->read_idx = 0;
+                entry++;
+                copied += sizeof(*entry);
+                size -= sizeof(*entry);
+        }
+        gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
+                hdr->read_idx);
+        *off = hdr->read_idx;
+        nvgpu_mutex_release(&dev->write_lock);
+        return copied;
+}
+static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
+{
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->write_enabled = true;
+        nvgpu_mutex_release(&dev->write_lock);
+        dev->g->ops.fecs_trace.enable(dev->g);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
+{
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
+        dev->g->ops.fecs_trace.disable(dev->g);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->write_enabled = false;
+        nvgpu_mutex_release(&dev->write_lock);
+        return 0;
+}
+static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
+                                        size_t size)
+{
+        struct gk20a *g = dev->g;
+        void *buf;
+        int err;
+        if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
+                return -EBUSY;
+        err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
+        if (err)
+                return err;
+        dev->hdr = buf;
+        dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
+        dev->size = size;
+        dev->num_ents = dev->hdr->num_ents;
+        gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
+                dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
+        return 0;
+}
+int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
+                void **buf, size_t *size)
+{
+        struct nvgpu_ctxsw_ring_header *hdr;
+        *size = roundup(*size, PAGE_SIZE);
+        hdr = vmalloc_user(*size);
+        if (!hdr)
+                return -ENOMEM;
+        hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
+        hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
+        hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
+                / sizeof(struct nvgpu_ctxsw_trace_entry);
+        hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
+        hdr->drop_count = 0;
+        hdr->read_idx = 0;
+        hdr->write_idx = 0;
+        hdr->write_seqno = 0;
+        *buf = hdr;
+        return 0;
+}
+int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
+{
+        struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
+        nvgpu_vfree(g, dev->hdr);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
+        struct nvgpu_ctxsw_ring_setup_args *args)
+{
+        size_t size = args->size;
+        int ret;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
+        if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
+                return -EINVAL;
+        nvgpu_mutex_acquire(&dev->write_lock);
+        ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
+        nvgpu_mutex_release(&dev->write_lock);
+        return ret;
+}
+static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
+        struct nvgpu_ctxsw_trace_filter_args *args)
+{
+        struct gk20a *g = dev->g;
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->filter = args->filter;
+        nvgpu_mutex_release(&dev->write_lock);
+        if (g->ops.fecs_trace.set_filter)
+                g->ops.fecs_trace.set_filter(g, &dev->filter);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
+        struct nvgpu_ctxsw_trace_filter_args *args)
+{
+        nvgpu_mutex_acquire(&dev->write_lock);
+        args->filter = dev->filter;
+        nvgpu_mutex_release(&dev->write_lock);
+        return 0;
+}
+static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
+{
+        struct gk20a *g = dev->g;
+        int err;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
+        err = gk20a_busy(g);
+        if (err)
+                return err;
+        if (g->ops.fecs_trace.flush)
+                err = g->ops.fecs_trace.flush(g);
+        if (likely(!err))
+                err = g->ops.fecs_trace.poll(g);
+        gk20a_idle(g);
+        return err;
+}
+int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
+{
+        struct nvgpu_os_linux *l;
+        struct gk20a *g;
+        struct gk20a_ctxsw_trace *trace;
+        struct gk20a_ctxsw_dev *dev;
+        int err;
+        size_t size;
+        u32 n;
+        /* only one VM for now */
+        const int vmid = 0;
+        l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
+        g = gk20a_get(&l->g);
+        if (!g)
+                return -ENODEV;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
+        if (!capable(CAP_SYS_ADMIN)) {
+                err = -EPERM;
+                goto free_ref;
+        }
+        err = gk20a_busy(g);
+        if (err)
+                goto free_ref;
+        trace = g->ctxsw_trace;
+        if (!trace) {
+                err = -ENODEV;
+                goto idle;
+        }
+        /* Allow only one user for this device */
+        dev = &trace->devs[vmid];
+        nvgpu_mutex_acquire(&dev->write_lock);
+        if (dev->hdr) {
+                err = -EBUSY;
+                goto done;
+        }
+        /* By default, allocate ring buffer big enough to accommodate
+         * FECS records with default event filter */
+        /* enable all traces by default */
+        NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
+        /* compute max number of entries generated with this filter */
+        n = g->ops.fecs_trace.max_entries(g, &dev->filter);
+        size = sizeof(struct nvgpu_ctxsw_ring_header) +
+                        n * sizeof(struct nvgpu_ctxsw_trace_entry);
+        gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
+                size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
+        err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
+        if (!err) {
+                filp->private_data = dev;
+                gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
+                        filp, dev, size);
+        }
+done:
+        nvgpu_mutex_release(&dev->write_lock);
+idle:
+        gk20a_idle(g);
+free_ref:
+        if (err)
+                gk20a_put(g);
+        return err;
+}
+int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
+        g->ops.fecs_trace.disable(g);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        dev->write_enabled = false;
+        nvgpu_mutex_release(&dev->write_lock);
+        if (dev->hdr) {
+                dev->g->ops.fecs_trace.free_user_buffer(dev->g);
+                dev->hdr = NULL;
+        }
+        gk20a_put(g);
+        return 0;
+}
+long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
+        unsigned long arg)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct gk20a *g = dev->g;
+        u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
+        int err = 0;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
+        if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
+                (_IOC_NR(cmd) == 0) ||
+                (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
+                (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
+                return -EINVAL;
+        memset(buf, 0, sizeof(buf));
+        if (_IOC_DIR(cmd) & _IOC_WRITE) {
+                if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
+                        return -EFAULT;
+        }
+        switch (cmd) {
+        case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
+                err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
+                break;
+        case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
+                err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
+                break;
+        case NVGPU_CTXSW_IOCTL_RING_SETUP:
+                err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
+                        (struct nvgpu_ctxsw_ring_setup_args *) buf);
+                break;
+        case NVGPU_CTXSW_IOCTL_SET_FILTER:
+                err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
+                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
+                break;
+        case NVGPU_CTXSW_IOCTL_GET_FILTER:
+                err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
+                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
+                break;
+        case NVGPU_CTXSW_IOCTL_POLL:
+                err = gk20a_ctxsw_dev_ioctl_poll(dev);
+                break;
+        default:
+                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
+                        cmd);
+                err = -ENOTTY;
+        }
+        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+                err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
+        return err;
+}
+unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
+        unsigned int mask = 0;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
+        nvgpu_mutex_acquire(&dev->write_lock);
+        poll_wait(filp, &dev->readout_wq.wq, wait);
+        if (!ring_is_empty(hdr))
+                mask |= POLLIN | POLLRDNORM;
+        nvgpu_mutex_release(&dev->write_lock);
+        return mask;
+}
+static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
+{
+        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
+        nvgpu_atomic_inc(&dev->vma_ref);
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
+                nvgpu_atomic_read(&dev->vma_ref));
+}
+static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
+{
+        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
+        nvgpu_atomic_dec(&dev->vma_ref);
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
+                nvgpu_atomic_read(&dev->vma_ref));
+}
+static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
+        .open = gk20a_ctxsw_dev_vma_open,
+        .close = gk20a_ctxsw_dev_vma_close,
+};
+int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
+                                struct vm_area_struct *vma)
+{
+        return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
+}
+int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+        struct gk20a_ctxsw_dev *dev = filp->private_data;
+        int ret;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
+                vma->vm_start, vma->vm_end);
+        ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
+        if (likely(!ret)) {
+                vma->vm_private_data = dev;
+                vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
+                vma->vm_ops->open(vma);
+        }
+        return ret;
+}
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+static int gk20a_ctxsw_init_devs(struct gk20a *g)
+{
+        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
+        struct gk20a_ctxsw_dev *dev = trace->devs;
+        int err;
+        int i;
+        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
+                dev->g = g;
+                dev->hdr = NULL;
+                dev->write_enabled = false;
+                nvgpu_cond_init(&dev->readout_wq);
+                err = nvgpu_mutex_init(&dev->write_lock);
+                if (err)
+                        return err;
+                nvgpu_atomic_set(&dev->vma_ref, 0);
+                dev++;
+        }
+        return 0;
+}
+#endif
+int gk20a_ctxsw_trace_init(struct gk20a *g)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
+        int err;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
+        /* if tracing is not supported, skip this */
+        if (!g->ops.fecs_trace.init)
+                return 0;
+        if (likely(trace))
+                return 0;
+        trace = nvgpu_kzalloc(g, sizeof(*trace));
+        if (unlikely(!trace))
+                return -ENOMEM;
+        g->ctxsw_trace = trace;
+        err = gk20a_ctxsw_init_devs(g);
+        if (err)
+                goto fail;
+        err = g->ops.fecs_trace.init(g);
+        if (unlikely(err))
+                goto fail;
+        return 0;
+fail:
+        memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
+        nvgpu_kfree(g, trace);
+        g->ctxsw_trace = NULL;
+        return err;
+#else
+        return 0;
+#endif
+}
+void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct gk20a_ctxsw_trace *trace;
+        struct gk20a_ctxsw_dev *dev;
+        int i;
+        if (!g->ctxsw_trace)
+                return;
+        trace = g->ctxsw_trace;
+        dev = trace->devs;
+        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
+                nvgpu_mutex_destroy(&dev->write_lock);
+                dev++;
+        }
+        nvgpu_kfree(g, g->ctxsw_trace);
+        g->ctxsw_trace = NULL;
+        g->ops.fecs_trace.deinit(g);
+#endif
+}
+int gk20a_ctxsw_trace_write(struct gk20a *g,
+                struct nvgpu_ctxsw_trace_entry *entry)
+{
+        struct nvgpu_ctxsw_ring_header *hdr;
+        struct gk20a_ctxsw_dev *dev;
+        int ret = 0;
+        const char *reason;
+        u32 write_idx;
+        if (!g->ctxsw_trace)
+                return 0;
+        if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
+                return -ENODEV;
+        dev = &g->ctxsw_trace->devs[entry->vmid];
+        hdr = dev->hdr;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
+                "dev=%p hdr=%p", dev, hdr);
+        nvgpu_mutex_acquire(&dev->write_lock);
+        if (unlikely(!hdr)) {
+                /* device has been released */
+                ret = -ENODEV;
+                goto done;
+        }
+        write_idx = hdr->write_idx;
+        if (write_idx >= dev->num_ents) {
+                nvgpu_err(dev->g,
+                        "write_idx=%u out of range [0..%u]",
+                        write_idx, dev->num_ents);
+                ret = -ENOSPC;
+                reason = "write_idx out of range";
+                goto disable;
+        }
+        entry->seqno = hdr->write_seqno++;
+        if (!dev->write_enabled) {
+                ret = -EBUSY;
+                reason = "write disabled";
+                goto drop;
+        }
+        if (unlikely(ring_is_full(hdr))) {
+                ret = -ENOSPC;
+                reason = "user fifo full";
+                goto drop;
+        }
+        if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
+                reason = "filtered out";
+                goto filter;
+        }
+        gk20a_dbg(gpu_dbg_ctxsw,
+                "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
+                entry->seqno, entry->context_id, entry->pid,
+                entry->tag, entry->timestamp);
+        dev->ents[write_idx] = *entry;
+        /* ensure record is written before updating write index */
+        nvgpu_smp_wmb();
+        write_idx++;
+        if (unlikely(write_idx >= hdr->num_ents))
+                write_idx = 0;
+        hdr->write_idx = write_idx;
+        gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
+                hdr->read_idx, hdr->write_idx, ring_len(hdr));
+        nvgpu_mutex_release(&dev->write_lock);
+        return ret;
+disable:
+        g->ops.fecs_trace.disable(g);
+drop:
+        hdr->drop_count++;
+filter:
+        gk20a_dbg(gpu_dbg_ctxsw,
+                        "dropping seqno=%d context_id=%08x pid=%lld "
+                        "tag=%x time=%llx (%s)",
+                        entry->seqno, entry->context_id, entry->pid,
+                        entry->tag, entry->timestamp, reason);
+done:
+        nvgpu_mutex_release(&dev->write_lock);
+        return ret;
+}
+void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
+{
+        struct gk20a_ctxsw_dev *dev;
+        if (!g->ctxsw_trace)
+                return;
+        dev = &g->ctxsw_trace->devs[vmid];
+        nvgpu_cond_signal_interruptible(&dev->readout_wq);
+}
+void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct nvgpu_ctxsw_trace_entry entry = {
+                .vmid = 0,
+                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
+                .context_id = 0,
+                .pid = ch->tgid,
+        };
+        if (!g->ctxsw_trace)
+                return;
+        g->ops.bus.read_ptimer(g, &entry.timestamp);
+        gk20a_ctxsw_trace_write(g, &entry);
+        gk20a_ctxsw_trace_wake_up(g, 0);
+#endif
+        trace_gk20a_channel_reset(ch->chid, ch->tsgid);
+}
+void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        struct nvgpu_ctxsw_trace_entry entry = {
+                .vmid = 0,
+                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
+                .context_id = 0,
+                .pid = tsg->tgid,
+        };
+        if (!g->ctxsw_trace)
+                return;
+        g->ops.bus.read_ptimer(g, &entry.timestamp);
+        gk20a_ctxsw_trace_write(g, &entry);
+        gk20a_ctxsw_trace_wake_up(g, 0);
+#endif
+        trace_gk20a_channel_reset(~0, tsg->tsgid);
+}

diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c new file mode 100644 index 00000000..8268bf60 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
@@ -0,0 +1,720 @@
	1	/*
	2	* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <linux/wait.h>
	18	#include <linux/ktime.h>
	19	#include <linux/uaccess.h>
	20	#include <linux/poll.h>
	21	#include <trace/events/gk20a.h>
	22	#include <uapi/linux/nvgpu.h>
	23
	24	#include "gk20a/gk20a.h"
	25	#include "gk20a/gr_gk20a.h"
	26
	27	#include <nvgpu/kmem.h>
	28	#include <nvgpu/log.h>
	29	#include <nvgpu/atomic.h>
	30	#include <nvgpu/barrier.h>
	31
	32	#include "platform_gk20a.h"
	33	#include "os_linux.h"
	34	#include "ctxsw_trace.h"
	35
	36	#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
	37	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
	38
	39	#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
	40
	41	/* Userland-facing FIFO (one global + eventually one per VM) */
	42	struct gk20a_ctxsw_dev {
	43	struct gk20a *g;
	44
	45	struct nvgpu_ctxsw_ring_header *hdr;
	46	struct nvgpu_ctxsw_trace_entry *ents;
	47	struct nvgpu_ctxsw_trace_filter filter;
	48	bool write_enabled;
	49	struct nvgpu_cond readout_wq;
	50	size_t size;
	51	u32 num_ents;
	52
	53	nvgpu_atomic_t vma_ref;
	54
	55	struct nvgpu_mutex write_lock;
	56	};
	57
	58
	59	struct gk20a_ctxsw_trace {
	60	struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
	61	};
	62
	63	static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
	64	{
	65	return (hdr->write_idx == hdr->read_idx);
	66	}
	67
	68	static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
	69	{
	70	return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
	71	}
	72
	73	static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
	74	{
	75	return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
	76	}
	77
	78	ssize_t gk20a_ctxsw_dev_read(struct file filp, char __user buf, size_t size,
	79	loff_t *off)
	80	{
	81	struct gk20a_ctxsw_dev *dev = filp->private_data;
	82	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
	83	struct nvgpu_ctxsw_trace_entry __user *entry =
	84	(struct nvgpu_ctxsw_trace_entry *) buf;
	85	size_t copied = 0;
	86	int err;
	87
	88	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw,
	89	"filp=%p buf=%p size=%zu", filp, buf, size);
	90
	91	nvgpu_mutex_acquire(&dev->write_lock);
	92	while (ring_is_empty(hdr)) {
	93	nvgpu_mutex_release(&dev->write_lock);
	94	if (filp->f_flags & O_NONBLOCK)
	95	return -EAGAIN;
	96	err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
	97	!ring_is_empty(hdr), 0);
	98	if (err)
	99	return err;
	100	nvgpu_mutex_acquire(&dev->write_lock);
	101	}
	102
	103	while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
	104	if (ring_is_empty(hdr))
	105	break;
	106
	107	if (copy_to_user(entry, &dev->ents[hdr->read_idx],
	108	sizeof(*entry))) {
	109	nvgpu_mutex_release(&dev->write_lock);
	110	return -EFAULT;
	111	}
	112
	113	hdr->read_idx++;
	114	if (hdr->read_idx >= hdr->num_ents)
	115	hdr->read_idx = 0;
	116
	117	entry++;
	118	copied += sizeof(*entry);
	119	size -= sizeof(*entry);
	120	}
	121
	122	gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
	123	hdr->read_idx);
	124
	125	*off = hdr->read_idx;
	126	nvgpu_mutex_release(&dev->write_lock);
	127
	128	return copied;
	129	}
	130
	131	static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
	132	{
	133	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "trace enabled");
	134	nvgpu_mutex_acquire(&dev->write_lock);
	135	dev->write_enabled = true;
	136	nvgpu_mutex_release(&dev->write_lock);
	137	dev->g->ops.fecs_trace.enable(dev->g);
	138	return 0;
	139	}
	140
	141	static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
	142	{
	143	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "trace disabled");
	144	dev->g->ops.fecs_trace.disable(dev->g);
	145	nvgpu_mutex_acquire(&dev->write_lock);
	146	dev->write_enabled = false;
	147	nvgpu_mutex_release(&dev->write_lock);
	148	return 0;
	149	}
	150
	151	static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
	152	size_t size)
	153	{
	154	struct gk20a *g = dev->g;
	155	void *buf;
	156	int err;
	157
	158	if ((dev->write_enabled) \|\| (nvgpu_atomic_read(&dev->vma_ref)))
	159	return -EBUSY;
	160
	161	err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
	162	if (err)
	163	return err;
	164
	165
	166	dev->hdr = buf;
	167	dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
	168	dev->size = size;
	169	dev->num_ents = dev->hdr->num_ents;
	170
	171	gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
	172	dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
	173	return 0;
	174	}
	175
	176	int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
	177	void *buf, size_t size)
	178	{
	179	struct nvgpu_ctxsw_ring_header *hdr;
	180
	181	size = roundup(size, PAGE_SIZE);
	182	hdr = vmalloc_user(*size);
	183	if (!hdr)
	184	return -ENOMEM;
	185
	186	hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
	187	hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
	188	hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
	189	/ sizeof(struct nvgpu_ctxsw_trace_entry);
	190	hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
	191	hdr->drop_count = 0;
	192	hdr->read_idx = 0;
	193	hdr->write_idx = 0;
	194	hdr->write_seqno = 0;
	195
	196	*buf = hdr;
	197	return 0;
	198	}
	199
	200	int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
	201	{
	202	struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
	203
	204	nvgpu_vfree(g, dev->hdr);
	205	return 0;
	206	}
	207
	208	static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
	209	struct nvgpu_ctxsw_ring_setup_args *args)
	210	{
	211	size_t size = args->size;
	212	int ret;
	213
	214	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "size=%zu", size);
	215
	216	if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
	217	return -EINVAL;
	218
	219	nvgpu_mutex_acquire(&dev->write_lock);
	220	ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
	221	nvgpu_mutex_release(&dev->write_lock);
	222
	223	return ret;
	224	}
	225
	226	static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
	227	struct nvgpu_ctxsw_trace_filter_args *args)
	228	{
	229	struct gk20a *g = dev->g;
	230
	231	nvgpu_mutex_acquire(&dev->write_lock);
	232	dev->filter = args->filter;
	233	nvgpu_mutex_release(&dev->write_lock);
	234
	235	if (g->ops.fecs_trace.set_filter)
	236	g->ops.fecs_trace.set_filter(g, &dev->filter);
	237	return 0;
	238	}
	239
	240	static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
	241	struct nvgpu_ctxsw_trace_filter_args *args)
	242	{
	243	nvgpu_mutex_acquire(&dev->write_lock);
	244	args->filter = dev->filter;
	245	nvgpu_mutex_release(&dev->write_lock);
	246
	247	return 0;
	248	}
	249
	250	static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
	251	{
	252	struct gk20a *g = dev->g;
	253	int err;
	254
	255	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "");
	256
	257	err = gk20a_busy(g);
	258	if (err)
	259	return err;
	260
	261	if (g->ops.fecs_trace.flush)
	262	err = g->ops.fecs_trace.flush(g);
	263
	264	if (likely(!err))
	265	err = g->ops.fecs_trace.poll(g);
	266
	267	gk20a_idle(g);
	268	return err;
	269	}
	270
	271	int gk20a_ctxsw_dev_open(struct inode inode, struct file filp)
	272	{
	273	struct nvgpu_os_linux *l;
	274	struct gk20a *g;
	275	struct gk20a_ctxsw_trace *trace;
	276	struct gk20a_ctxsw_dev *dev;
	277	int err;
	278	size_t size;
	279	u32 n;
	280
	281	/* only one VM for now */
	282	const int vmid = 0;
	283
	284	l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
	285	g = gk20a_get(&l->g);
	286	if (!g)
	287	return -ENODEV;
	288
	289	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "g=%p", g);
	290
	291	if (!capable(CAP_SYS_ADMIN)) {
	292	err = -EPERM;
	293	goto free_ref;
	294	}
	295
	296	err = gk20a_busy(g);
	297	if (err)
	298	goto free_ref;
	299
	300	trace = g->ctxsw_trace;
	301	if (!trace) {
	302	err = -ENODEV;
	303	goto idle;
	304	}
	305
	306	/* Allow only one user for this device */
	307	dev = &trace->devs[vmid];
	308	nvgpu_mutex_acquire(&dev->write_lock);
	309	if (dev->hdr) {
	310	err = -EBUSY;
	311	goto done;
	312	}
	313
	314	/* By default, allocate ring buffer big enough to accommodate
	315	* FECS records with default event filter */
	316
	317	/* enable all traces by default */
	318	NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
	319
	320	/* compute max number of entries generated with this filter */
	321	n = g->ops.fecs_trace.max_entries(g, &dev->filter);
	322
	323	size = sizeof(struct nvgpu_ctxsw_ring_header) +
	324	n * sizeof(struct nvgpu_ctxsw_trace_entry);
	325	gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
	326	size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
	327
	328	err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
	329	if (!err) {
	330	filp->private_data = dev;
	331	gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
	332	filp, dev, size);
	333	}
	334
	335	done:
	336	nvgpu_mutex_release(&dev->write_lock);
	337
	338	idle:
	339	gk20a_idle(g);
	340	free_ref:
	341	if (err)
	342	gk20a_put(g);
	343	return err;
	344	}
	345
	346	int gk20a_ctxsw_dev_release(struct inode inode, struct file filp)
	347	{
	348	struct gk20a_ctxsw_dev *dev = filp->private_data;
	349	struct gk20a *g = dev->g;
	350
	351	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "dev: %p", dev);
	352
	353	g->ops.fecs_trace.disable(g);
	354
	355	nvgpu_mutex_acquire(&dev->write_lock);
	356	dev->write_enabled = false;
	357	nvgpu_mutex_release(&dev->write_lock);
	358
	359	if (dev->hdr) {
	360	dev->g->ops.fecs_trace.free_user_buffer(dev->g);
	361	dev->hdr = NULL;
	362	}
	363	gk20a_put(g);
	364	return 0;
	365	}
	366
	367	long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
	368	unsigned long arg)
	369	{
	370	struct gk20a_ctxsw_dev *dev = filp->private_data;
	371	struct gk20a *g = dev->g;
	372	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
	373	int err = 0;
	374
	375	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
	376
	377	if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) \|\|
	378	(_IOC_NR(cmd) == 0) \|\|
	379	(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) \|\|
	380	(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
	381	return -EINVAL;
	382
	383	memset(buf, 0, sizeof(buf));
	384	if (_IOC_DIR(cmd) & _IOC_WRITE) {
	385	if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
	386	return -EFAULT;
	387	}
	388
	389	switch (cmd) {
	390	case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
	391	err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
	392	break;
	393	case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
	394	err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
	395	break;
	396	case NVGPU_CTXSW_IOCTL_RING_SETUP:
	397	err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
	398	(struct nvgpu_ctxsw_ring_setup_args *) buf);
	399	break;
	400	case NVGPU_CTXSW_IOCTL_SET_FILTER:
	401	err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
	402	(struct nvgpu_ctxsw_trace_filter_args *) buf);
	403	break;
	404	case NVGPU_CTXSW_IOCTL_GET_FILTER:
	405	err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
	406	(struct nvgpu_ctxsw_trace_filter_args *) buf);
	407	break;
	408	case NVGPU_CTXSW_IOCTL_POLL:
	409	err = gk20a_ctxsw_dev_ioctl_poll(dev);
	410	break;
	411	default:
	412	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
	413	cmd);
	414	err = -ENOTTY;
	415	}
	416
	417	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
	418	err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
	419
	420	return err;
	421	}
	422
	423	unsigned int gk20a_ctxsw_dev_poll(struct file filp, poll_table wait)
	424	{
	425	struct gk20a_ctxsw_dev *dev = filp->private_data;
	426	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
	427	unsigned int mask = 0;
	428
	429	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "");
	430
	431	nvgpu_mutex_acquire(&dev->write_lock);
	432	poll_wait(filp, &dev->readout_wq.wq, wait);
	433	if (!ring_is_empty(hdr))
	434	mask \|= POLLIN \| POLLRDNORM;
	435	nvgpu_mutex_release(&dev->write_lock);
	436
	437	return mask;
	438	}
	439
	440	static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
	441	{
	442	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
	443
	444	nvgpu_atomic_inc(&dev->vma_ref);
	445	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "vma_ref=%d",
	446	nvgpu_atomic_read(&dev->vma_ref));
	447	}
	448
	449	static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
	450	{
	451	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
	452
	453	nvgpu_atomic_dec(&dev->vma_ref);
	454	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "vma_ref=%d",
	455	nvgpu_atomic_read(&dev->vma_ref));
	456	}
	457
	458	static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
	459	.open = gk20a_ctxsw_dev_vma_open,
	460	.close = gk20a_ctxsw_dev_vma_close,
	461	};
	462
	463	int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
	464	struct vm_area_struct *vma)
	465	{
	466	return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
	467	}
	468
	469	int gk20a_ctxsw_dev_mmap(struct file filp, struct vm_area_struct vma)
	470	{
	471	struct gk20a_ctxsw_dev *dev = filp->private_data;
	472	int ret;
	473
	474	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
	475	vma->vm_start, vma->vm_end);
	476
	477	ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
	478	if (likely(!ret)) {
	479	vma->vm_private_data = dev;
	480	vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
	481	vma->vm_ops->open(vma);
	482	}
	483
	484	return ret;
	485	}
	486
	487	#ifdef CONFIG_GK20A_CTXSW_TRACE
	488	static int gk20a_ctxsw_init_devs(struct gk20a *g)
	489	{
	490	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
	491	struct gk20a_ctxsw_dev *dev = trace->devs;
	492	int err;
	493	int i;
	494
	495	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
	496	dev->g = g;
	497	dev->hdr = NULL;
	498	dev->write_enabled = false;
	499	nvgpu_cond_init(&dev->readout_wq);
	500	err = nvgpu_mutex_init(&dev->write_lock);
	501	if (err)
	502	return err;
	503	nvgpu_atomic_set(&dev->vma_ref, 0);
	504	dev++;
	505	}
	506	return 0;
	507	}
	508	#endif
	509
	510	int gk20a_ctxsw_trace_init(struct gk20a *g)
	511	{
	512	#ifdef CONFIG_GK20A_CTXSW_TRACE
	513	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
	514	int err;
	515
	516	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
	517
	518	/* if tracing is not supported, skip this */
	519	if (!g->ops.fecs_trace.init)
	520	return 0;
	521
	522	if (likely(trace))
	523	return 0;
	524
	525	trace = nvgpu_kzalloc(g, sizeof(*trace));
	526	if (unlikely(!trace))
	527	return -ENOMEM;
	528	g->ctxsw_trace = trace;
	529
	530	err = gk20a_ctxsw_init_devs(g);
	531	if (err)
	532	goto fail;
	533
	534	err = g->ops.fecs_trace.init(g);
	535	if (unlikely(err))
	536	goto fail;
	537
	538	return 0;
	539
	540	fail:
	541	memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
	542	nvgpu_kfree(g, trace);
	543	g->ctxsw_trace = NULL;
	544	return err;
	545	#else
	546	return 0;
	547	#endif
	548	}
	549
	550	void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
	551	{
	552	#ifdef CONFIG_GK20A_CTXSW_TRACE
	553	struct gk20a_ctxsw_trace *trace;
	554	struct gk20a_ctxsw_dev *dev;
	555	int i;
	556
	557	if (!g->ctxsw_trace)
	558	return;
	559
	560	trace = g->ctxsw_trace;
	561	dev = trace->devs;
	562
	563	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
	564	nvgpu_mutex_destroy(&dev->write_lock);
	565	dev++;
	566	}
	567
	568	nvgpu_kfree(g, g->ctxsw_trace);
	569	g->ctxsw_trace = NULL;
	570
	571	g->ops.fecs_trace.deinit(g);
	572	#endif
	573	}
	574
	575	int gk20a_ctxsw_trace_write(struct gk20a *g,
	576	struct nvgpu_ctxsw_trace_entry *entry)
	577	{
	578	struct nvgpu_ctxsw_ring_header *hdr;
	579	struct gk20a_ctxsw_dev *dev;
	580	int ret = 0;
	581	const char *reason;
	582	u32 write_idx;
	583
	584	if (!g->ctxsw_trace)
	585	return 0;
	586
	587	if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
	588	return -ENODEV;
	589
	590	dev = &g->ctxsw_trace->devs[entry->vmid];
	591	hdr = dev->hdr;
	592
	593	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_ctxsw,
	594	"dev=%p hdr=%p", dev, hdr);
	595
	596	nvgpu_mutex_acquire(&dev->write_lock);
	597
	598	if (unlikely(!hdr)) {
	599	/* device has been released */
	600	ret = -ENODEV;
	601	goto done;
	602	}
	603
	604	write_idx = hdr->write_idx;
	605	if (write_idx >= dev->num_ents) {
	606	nvgpu_err(dev->g,
	607	"write_idx=%u out of range [0..%u]",
	608	write_idx, dev->num_ents);
	609	ret = -ENOSPC;
	610	reason = "write_idx out of range";
	611	goto disable;
	612	}
	613
	614	entry->seqno = hdr->write_seqno++;
	615
	616	if (!dev->write_enabled) {
	617	ret = -EBUSY;
	618	reason = "write disabled";
	619	goto drop;
	620	}
	621
	622	if (unlikely(ring_is_full(hdr))) {
	623	ret = -ENOSPC;
	624	reason = "user fifo full";
	625	goto drop;
	626	}
	627
	628	if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
	629	reason = "filtered out";
	630	goto filter;
	631	}
	632
	633	gk20a_dbg(gpu_dbg_ctxsw,
	634	"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
	635	entry->seqno, entry->context_id, entry->pid,
	636	entry->tag, entry->timestamp);
	637
	638	dev->ents[write_idx] = *entry;
	639
	640	/* ensure record is written before updating write index */
	641	nvgpu_smp_wmb();
	642
	643	write_idx++;
	644	if (unlikely(write_idx >= hdr->num_ents))
	645	write_idx = 0;
	646	hdr->write_idx = write_idx;
	647	gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
	648	hdr->read_idx, hdr->write_idx, ring_len(hdr));
	649
	650	nvgpu_mutex_release(&dev->write_lock);
	651	return ret;
	652
	653	disable:
	654	g->ops.fecs_trace.disable(g);
	655
	656	drop:
	657	hdr->drop_count++;
	658
	659	filter:
	660	gk20a_dbg(gpu_dbg_ctxsw,
	661	"dropping seqno=%d context_id=%08x pid=%lld "
	662	"tag=%x time=%llx (%s)",
	663	entry->seqno, entry->context_id, entry->pid,
	664	entry->tag, entry->timestamp, reason);
	665
	666	done:
	667	nvgpu_mutex_release(&dev->write_lock);
	668	return ret;
	669	}
	670
	671	void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
	672	{
	673	struct gk20a_ctxsw_dev *dev;
	674
	675	if (!g->ctxsw_trace)
	676	return;
	677
	678	dev = &g->ctxsw_trace->devs[vmid];
	679	nvgpu_cond_signal_interruptible(&dev->readout_wq);
	680	}
	681
	682	void gk20a_ctxsw_trace_channel_reset(struct gk20a g, struct channel_gk20a ch)
	683	{
	684	#ifdef CONFIG_GK20A_CTXSW_TRACE
	685	struct nvgpu_ctxsw_trace_entry entry = {
	686	.vmid = 0,
	687	.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
	688	.context_id = 0,
	689	.pid = ch->tgid,
	690	};
	691
	692	if (!g->ctxsw_trace)
	693	return;
	694
	695	g->ops.bus.read_ptimer(g, &entry.timestamp);
	696	gk20a_ctxsw_trace_write(g, &entry);
	697	gk20a_ctxsw_trace_wake_up(g, 0);
	698	#endif
	699	trace_gk20a_channel_reset(ch->chid, ch->tsgid);
	700	}
	701
	702	void gk20a_ctxsw_trace_tsg_reset(struct gk20a g, struct tsg_gk20a tsg)
	703	{
	704	#ifdef CONFIG_GK20A_CTXSW_TRACE
	705	struct nvgpu_ctxsw_trace_entry entry = {
	706	.vmid = 0,
	707	.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
	708	.context_id = 0,
	709	.pid = tsg->tgid,
	710	};
	711
	712	if (!g->ctxsw_trace)
	713	return;
	714
	715	g->ops.bus.read_ptimer(g, &entry.timestamp);
	716	gk20a_ctxsw_trace_write(g, &entry);
	717	gk20a_ctxsw_trace_wake_up(g, 0);
	718	#endif
	719	trace_gk20a_channel_reset(~0, tsg->tsgid);
	720	}