1 files changed, 0 insertions, 727 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
deleted file mode 100644
index fb33de23..00000000
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ /dev/null
@@ -1,727 +0,0 @@
-/*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include <asm/barrier.h>
-#include <linux/wait.h>
-#include <linux/ktime.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#include <trace/events/gk20a.h>
-#include <uapi/linux/nvgpu.h>
-#include <nvgpu/kmem.h>
-#include "ctxsw_trace_gk20a.h"
-#include "gk20a.h"
-#include "platform_gk20a.h"
-#include "gr_gk20a.h"
-#include "common/linux/os_linux.h"
-#include <nvgpu/log.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/barrier.h>
-#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE      (128*PAGE_SIZE)
-/* Userland-facing FIFO (one global + eventually one per VM) */
-struct gk20a_ctxsw_dev {
-        struct gk20a *g;
-        struct nvgpu_ctxsw_ring_header *hdr;
-        struct nvgpu_ctxsw_trace_entry *ents;
-        struct nvgpu_ctxsw_trace_filter filter;
-        bool write_enabled;
-        struct nvgpu_cond readout_wq;
-        size_t size;
-        u32 num_ents;
-        nvgpu_atomic_t vma_ref;
-        struct nvgpu_mutex write_lock;
-};
-struct gk20a_ctxsw_trace {
-        struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
-};
-static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
-{
-        return (hdr->write_idx == hdr->read_idx);
-}
-static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
-{
-        return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
-}
-static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
-{
-        return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
-}
-ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
-        loff_t *off)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
-        struct nvgpu_ctxsw_trace_entry __user *entry =
-                (struct nvgpu_ctxsw_trace_entry *) buf;
-        size_t copied = 0;
-        int err;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
-                "filp=%p buf=%p size=%zu", filp, buf, size);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        while (ring_is_empty(hdr)) {
-                nvgpu_mutex_release(&dev->write_lock);
-                if (filp->f_flags & O_NONBLOCK)
-                        return -EAGAIN;
-                err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
-                        !ring_is_empty(hdr), 0);
-                if (err)
-                        return err;
-                nvgpu_mutex_acquire(&dev->write_lock);
-        }
-        while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
-                if (ring_is_empty(hdr))
-                        break;
-                if (copy_to_user(entry, &dev->ents[hdr->read_idx],
-                        sizeof(*entry))) {
-                        nvgpu_mutex_release(&dev->write_lock);
-                        return -EFAULT;
-                }
-                hdr->read_idx++;
-                if (hdr->read_idx >= hdr->num_ents)
-                        hdr->read_idx = 0;
-                entry++;
-                copied += sizeof(*entry);
-                size -= sizeof(*entry);
-        }
-        gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
-                hdr->read_idx);
-        *off = hdr->read_idx;
-        nvgpu_mutex_release(&dev->write_lock);
-        return copied;
-}
-static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
-{
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->write_enabled = true;
-        nvgpu_mutex_release(&dev->write_lock);
-        dev->g->ops.fecs_trace.enable(dev->g);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
-{
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
-        dev->g->ops.fecs_trace.disable(dev->g);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->write_enabled = false;
-        nvgpu_mutex_release(&dev->write_lock);
-        return 0;
-}
-static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
-                                        size_t size)
-{
-        struct gk20a *g = dev->g;
-        void *buf;
-        int err;
-        if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
-                return -EBUSY;
-        err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
-        if (err)
-                return err;
-        dev->hdr = buf;
-        dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
-        dev->size = size;
-        dev->num_ents = dev->hdr->num_ents;
-        gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
-                dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
-        return 0;
-}
-int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
-                void **buf, size_t *size)
-{
-        struct nvgpu_ctxsw_ring_header *hdr;
-        *size = roundup(*size, PAGE_SIZE);
-        hdr = vmalloc_user(*size);
-        if (!hdr)
-                return -ENOMEM;
-        hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
-        hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
-        hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
-                / sizeof(struct nvgpu_ctxsw_trace_entry);
-        hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
-        hdr->drop_count = 0;
-        hdr->read_idx = 0;
-        hdr->write_idx = 0;
-        hdr->write_seqno = 0;
-        *buf = hdr;
-        return 0;
-}
-int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
-{
-        struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
-        nvgpu_vfree(g, dev->hdr);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
-        struct nvgpu_ctxsw_ring_setup_args *args)
-{
-        size_t size = args->size;
-        int ret;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
-        if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
-                return -EINVAL;
-        nvgpu_mutex_acquire(&dev->write_lock);
-        ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-        nvgpu_mutex_release(&dev->write_lock);
-        return ret;
-}
-static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
-        struct nvgpu_ctxsw_trace_filter_args *args)
-{
-        struct gk20a *g = dev->g;
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->filter = args->filter;
-        nvgpu_mutex_release(&dev->write_lock);
-        if (g->ops.fecs_trace.set_filter)
-                g->ops.fecs_trace.set_filter(g, &dev->filter);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
-        struct nvgpu_ctxsw_trace_filter_args *args)
-{
-        nvgpu_mutex_acquire(&dev->write_lock);
-        args->filter = dev->filter;
-        nvgpu_mutex_release(&dev->write_lock);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
-{
-        struct gk20a *g = dev->g;
-        int err;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
-        err = gk20a_busy(g);
-        if (err)
-                return err;
-        if (g->ops.fecs_trace.flush)
-                err = g->ops.fecs_trace.flush(g);
-        if (likely(!err))
-                err = g->ops.fecs_trace.poll(g);
-        gk20a_idle(g);
-        return err;
-}
-int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
-{
-        struct nvgpu_os_linux *l;
-        struct gk20a *g;
-        struct gk20a_ctxsw_trace *trace;
-        struct gk20a_ctxsw_dev *dev;
-        int err;
-        size_t size;
-        u32 n;
-        /* only one VM for now */
-        const int vmid = 0;
-        l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
-        g = gk20a_get(&l->g);
-        if (!g)
-                return -ENODEV;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
-        if (!capable(CAP_SYS_ADMIN)) {
-                err = -EPERM;
-                goto free_ref;
-        }
-        err = gk20a_busy(g);
-        if (err)
-                goto free_ref;
-        trace = g->ctxsw_trace;
-        if (!trace) {
-                err = -ENODEV;
-                goto idle;
-        }
-        /* Allow only one user for this device */
-        dev = &trace->devs[vmid];
-        nvgpu_mutex_acquire(&dev->write_lock);
-        if (dev->hdr) {
-                err = -EBUSY;
-                goto done;
-        }
-        /* By default, allocate ring buffer big enough to accommodate
-         * FECS records with default event filter */
-        /* enable all traces by default */
-        NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
-        /* compute max number of entries generated with this filter */
-        n = g->ops.fecs_trace.max_entries(g, &dev->filter);
-        size = sizeof(struct nvgpu_ctxsw_ring_header) +
-                        n * sizeof(struct nvgpu_ctxsw_trace_entry);
-        gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
-                size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
-        err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-        if (!err) {
-                filp->private_data = dev;
-                gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
-                        filp, dev, size);
-        }
-done:
-        nvgpu_mutex_release(&dev->write_lock);
-idle:
-        gk20a_idle(g);
-free_ref:
-        if (err)
-                gk20a_put(g);
-        return err;
-}
-int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct gk20a *g = dev->g;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
-        g->ops.fecs_trace.disable(g);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->write_enabled = false;
-        nvgpu_mutex_release(&dev->write_lock);
-        if (dev->hdr) {
-                dev->g->ops.fecs_trace.free_user_buffer(dev->g);
-                dev->hdr = NULL;
-        }
-        gk20a_put(g);
-        return 0;
-}
-long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
-        unsigned long arg)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct gk20a *g = dev->g;
-        u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
-        int err = 0;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
-        if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
-                (_IOC_NR(cmd) == 0) ||
-                (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
-                (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
-                return -EINVAL;
-        memset(buf, 0, sizeof(buf));
-        if (_IOC_DIR(cmd) & _IOC_WRITE) {
-                if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
-                        return -EFAULT;
-        }
-        switch (cmd) {
-        case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
-                err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
-                break;
-        case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
-                err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
-                break;
-        case NVGPU_CTXSW_IOCTL_RING_SETUP:
-                err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
-                        (struct nvgpu_ctxsw_ring_setup_args *) buf);
-                break;
-        case NVGPU_CTXSW_IOCTL_SET_FILTER:
-                err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
-                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
-                break;
-        case NVGPU_CTXSW_IOCTL_GET_FILTER:
-                err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
-                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
-                break;
-        case NVGPU_CTXSW_IOCTL_POLL:
-                err = gk20a_ctxsw_dev_ioctl_poll(dev);
-                break;
-        default:
-                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
-                        cmd);
-                err = -ENOTTY;
-        }
-        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-                err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
-        return err;
-}
-unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
-        unsigned int mask = 0;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
-        nvgpu_mutex_acquire(&dev->write_lock);
-        poll_wait(filp, &dev->readout_wq.wq, wait);
-        if (!ring_is_empty(hdr))
-                mask |= POLLIN | POLLRDNORM;
-        nvgpu_mutex_release(&dev->write_lock);
-        return mask;
-}
-static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
-{
-        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-        nvgpu_atomic_inc(&dev->vma_ref);
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-                nvgpu_atomic_read(&dev->vma_ref));
-}
-static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
-{
-        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-        nvgpu_atomic_dec(&dev->vma_ref);
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-                nvgpu_atomic_read(&dev->vma_ref));
-}
-static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
-        .open = gk20a_ctxsw_dev_vma_open,
-        .close = gk20a_ctxsw_dev_vma_close,
-};
-int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
-                                struct vm_area_struct *vma)
-{
-        return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
-}
-int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        int ret;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
-                vma->vm_start, vma->vm_end);
-        ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
-        if (likely(!ret)) {
-                vma->vm_private_data = dev;
-                vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
-                vma->vm_ops->open(vma);
-        }
-        return ret;
-}
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-static int gk20a_ctxsw_init_devs(struct gk20a *g)
-{
-        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
-        struct gk20a_ctxsw_dev *dev = trace->devs;
-        int err;
-        int i;
-        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
-                dev->g = g;
-                dev->hdr = NULL;
-                dev->write_enabled = false;
-                nvgpu_cond_init(&dev->readout_wq);
-                err = nvgpu_mutex_init(&dev->write_lock);
-                if (err)
-                        return err;
-                nvgpu_atomic_set(&dev->vma_ref, 0);
-                dev++;
-        }
-        return 0;
-}
-#endif
-int gk20a_ctxsw_trace_init(struct gk20a *g)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
-        int err;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
-        /* if tracing is not supported, skip this */
-        if (!g->ops.fecs_trace.init)
-                return 0;
-        if (likely(trace))
-                return 0;
-        trace = nvgpu_kzalloc(g, sizeof(*trace));
-        if (unlikely(!trace))
-                return -ENOMEM;
-        g->ctxsw_trace = trace;
-        err = gk20a_ctxsw_init_devs(g);
-        if (err)
-                goto fail;
-        err = g->ops.fecs_trace.init(g);
-        if (unlikely(err))
-                goto fail;
-        return 0;
-fail:
-        memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
-        nvgpu_kfree(g, trace);
-        g->ctxsw_trace = NULL;
-        return err;
-#else
-        return 0;
-#endif
-}
-void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct gk20a_ctxsw_trace *trace;
-        struct gk20a_ctxsw_dev *dev;
-        int i;
-        if (!g->ctxsw_trace)
-                return;
-        trace = g->ctxsw_trace;
-        dev = trace->devs;
-        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
-                nvgpu_mutex_destroy(&dev->write_lock);
-                dev++;
-        }
-        nvgpu_kfree(g, g->ctxsw_trace);
-        g->ctxsw_trace = NULL;
-        g->ops.fecs_trace.deinit(g);
-#endif
-}
-int gk20a_ctxsw_trace_write(struct gk20a *g,
-                struct nvgpu_ctxsw_trace_entry *entry)
-{
-        struct nvgpu_ctxsw_ring_header *hdr;
-        struct gk20a_ctxsw_dev *dev;
-        int ret = 0;
-        const char *reason;
-        u32 write_idx;
-        if (!g->ctxsw_trace)
-                return 0;
-        if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
-                return -ENODEV;
-        dev = &g->ctxsw_trace->devs[entry->vmid];
-        hdr = dev->hdr;
-        gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
-                "dev=%p hdr=%p", dev, hdr);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        if (unlikely(!hdr)) {
-                /* device has been released */
-                ret = -ENODEV;
-                goto done;
-        }
-        write_idx = hdr->write_idx;
-        if (write_idx >= dev->num_ents) {
-                nvgpu_err(dev->g,
-                        "write_idx=%u out of range [0..%u]",
-                        write_idx, dev->num_ents);
-                ret = -ENOSPC;
-                reason = "write_idx out of range";
-                goto disable;
-        }
-        entry->seqno = hdr->write_seqno++;
-        if (!dev->write_enabled) {
-                ret = -EBUSY;
-                reason = "write disabled";
-                goto drop;
-        }
-        if (unlikely(ring_is_full(hdr))) {
-                ret = -ENOSPC;
-                reason = "user fifo full";
-                goto drop;
-        }
-        if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
-                reason = "filtered out";
-                goto filter;
-        }
-        gk20a_dbg(gpu_dbg_ctxsw,
-                "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
-                entry->seqno, entry->context_id, entry->pid,
-                entry->tag, entry->timestamp);
-        dev->ents[write_idx] = *entry;
-        /* ensure record is written before updating write index */
-        nvgpu_smp_wmb();
-        write_idx++;
-        if (unlikely(write_idx >= hdr->num_ents))
-                write_idx = 0;
-        hdr->write_idx = write_idx;
-        gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
-                hdr->read_idx, hdr->write_idx, ring_len(hdr));
-        nvgpu_mutex_release(&dev->write_lock);
-        return ret;
-disable:
-        g->ops.fecs_trace.disable(g);
-drop:
-        hdr->drop_count++;
-filter:
-        gk20a_dbg(gpu_dbg_ctxsw,
-                        "dropping seqno=%d context_id=%08x pid=%lld "
-                        "tag=%x time=%llx (%s)",
-                        entry->seqno, entry->context_id, entry->pid,
-                        entry->tag, entry->timestamp, reason);
-done:
-        nvgpu_mutex_release(&dev->write_lock);
-        return ret;
-}
-void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
-{
-        struct gk20a_ctxsw_dev *dev;
-        if (!g->ctxsw_trace)
-                return;
-        dev = &g->ctxsw_trace->devs[vmid];
-        nvgpu_cond_signal_interruptible(&dev->readout_wq);
-}
-void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct nvgpu_ctxsw_trace_entry entry = {
-                .vmid = 0,
-                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-                .context_id = 0,
-                .pid = ch->tgid,
-        };
-        if (!g->ctxsw_trace)
-                return;
-        g->ops.bus.read_ptimer(g, &entry.timestamp);
-        gk20a_ctxsw_trace_write(g, &entry);
-        gk20a_ctxsw_trace_wake_up(g, 0);
-#endif
-        trace_gk20a_channel_reset(ch->chid, ch->tsgid);
-}
-void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct nvgpu_ctxsw_trace_entry entry = {
-                .vmid = 0,
-                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-                .context_id = 0,
-                .pid = tsg->tgid,
-        };
-        if (!g->ctxsw_trace)
-                return;
-        g->ops.bus.read_ptimer(g, &entry.timestamp);
-        gk20a_ctxsw_trace_write(g, &entry);
-        gk20a_ctxsw_trace_wake_up(g, 0);
-#endif
-        trace_gk20a_channel_reset(~0, tsg->tsgid);
-}

diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c deleted file mode 100644 index fb33de23..00000000 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ /dev/null
@@ -1,727 +0,0 @@
1	/*
2	* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20	* DEALINGS IN THE SOFTWARE.
21	*/
22
23	#include <asm/barrier.h>
24	#include <linux/wait.h>
25	#include <linux/ktime.h>
26	#include <linux/uaccess.h>
27	#include <linux/poll.h>
28	#include <trace/events/gk20a.h>
29	#include <uapi/linux/nvgpu.h>
30
31	#include <nvgpu/kmem.h>
32
33	#include "ctxsw_trace_gk20a.h"
34	#include "gk20a.h"
35	#include "platform_gk20a.h"
36	#include "gr_gk20a.h"
37	#include "common/linux/os_linux.h"
38
39	#include <nvgpu/log.h>
40	#include <nvgpu/atomic.h>
41	#include <nvgpu/barrier.h>
42
43	#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
44	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
45
46	#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
47
48	/* Userland-facing FIFO (one global + eventually one per VM) */
49	struct gk20a_ctxsw_dev {
50	struct gk20a *g;
51
52	struct nvgpu_ctxsw_ring_header *hdr;
53	struct nvgpu_ctxsw_trace_entry *ents;
54	struct nvgpu_ctxsw_trace_filter filter;
55	bool write_enabled;
56	struct nvgpu_cond readout_wq;
57	size_t size;
58	u32 num_ents;
59
60	nvgpu_atomic_t vma_ref;
61
62	struct nvgpu_mutex write_lock;
63	};
64
65
66	struct gk20a_ctxsw_trace {
67	struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
68	};
69
70	static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
71	{
72	return (hdr->write_idx == hdr->read_idx);
73	}
74
75	static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
76	{
77	return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
78	}
79
80	static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
81	{
82	return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
83	}
84
85	ssize_t gk20a_ctxsw_dev_read(struct file filp, char __user buf, size_t size,
86	loff_t *off)
87	{
88	struct gk20a_ctxsw_dev *dev = filp->private_data;
89	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
90	struct nvgpu_ctxsw_trace_entry __user *entry =
91	(struct nvgpu_ctxsw_trace_entry *) buf;
92	size_t copied = 0;
93	int err;
94
95	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw,
96	"filp=%p buf=%p size=%zu", filp, buf, size);
97
98	nvgpu_mutex_acquire(&dev->write_lock);
99	while (ring_is_empty(hdr)) {
100	nvgpu_mutex_release(&dev->write_lock);
101	if (filp->f_flags & O_NONBLOCK)
102	return -EAGAIN;
103	err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
104	!ring_is_empty(hdr), 0);
105	if (err)
106	return err;
107	nvgpu_mutex_acquire(&dev->write_lock);
108	}
109
110	while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
111	if (ring_is_empty(hdr))
112	break;
113
114	if (copy_to_user(entry, &dev->ents[hdr->read_idx],
115	sizeof(*entry))) {
116	nvgpu_mutex_release(&dev->write_lock);
117	return -EFAULT;
118	}
119
120	hdr->read_idx++;
121	if (hdr->read_idx >= hdr->num_ents)
122	hdr->read_idx = 0;
123
124	entry++;
125	copied += sizeof(*entry);
126	size -= sizeof(*entry);
127	}
128
129	gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
130	hdr->read_idx);
131
132	*off = hdr->read_idx;
133	nvgpu_mutex_release(&dev->write_lock);
134
135	return copied;
136	}
137
138	static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
139	{
140	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "trace enabled");
141	nvgpu_mutex_acquire(&dev->write_lock);
142	dev->write_enabled = true;
143	nvgpu_mutex_release(&dev->write_lock);
144	dev->g->ops.fecs_trace.enable(dev->g);
145	return 0;
146	}
147
148	static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
149	{
150	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "trace disabled");
151	dev->g->ops.fecs_trace.disable(dev->g);
152	nvgpu_mutex_acquire(&dev->write_lock);
153	dev->write_enabled = false;
154	nvgpu_mutex_release(&dev->write_lock);
155	return 0;
156	}
157
158	static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
159	size_t size)
160	{
161	struct gk20a *g = dev->g;
162	void *buf;
163	int err;
164
165	if ((dev->write_enabled) \|\| (nvgpu_atomic_read(&dev->vma_ref)))
166	return -EBUSY;
167
168	err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
169	if (err)
170	return err;
171
172
173	dev->hdr = buf;
174	dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
175	dev->size = size;
176	dev->num_ents = dev->hdr->num_ents;
177
178	gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
179	dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
180	return 0;
181	}
182
183	int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
184	void *buf, size_t size)
185	{
186	struct nvgpu_ctxsw_ring_header *hdr;
187
188	size = roundup(size, PAGE_SIZE);
189	hdr = vmalloc_user(*size);
190	if (!hdr)
191	return -ENOMEM;
192
193	hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
194	hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
195	hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
196	/ sizeof(struct nvgpu_ctxsw_trace_entry);
197	hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
198	hdr->drop_count = 0;
199	hdr->read_idx = 0;
200	hdr->write_idx = 0;
201	hdr->write_seqno = 0;
202
203	*buf = hdr;
204	return 0;
205	}
206
207	int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
208	{
209	struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
210
211	nvgpu_vfree(g, dev->hdr);
212	return 0;
213	}
214
215	static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
216	struct nvgpu_ctxsw_ring_setup_args *args)
217	{
218	size_t size = args->size;
219	int ret;
220
221	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "size=%zu", size);
222
223	if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
224	return -EINVAL;
225
226	nvgpu_mutex_acquire(&dev->write_lock);
227	ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
228	nvgpu_mutex_release(&dev->write_lock);
229
230	return ret;
231	}
232
233	static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
234	struct nvgpu_ctxsw_trace_filter_args *args)
235	{
236	struct gk20a *g = dev->g;
237
238	nvgpu_mutex_acquire(&dev->write_lock);
239	dev->filter = args->filter;
240	nvgpu_mutex_release(&dev->write_lock);
241
242	if (g->ops.fecs_trace.set_filter)
243	g->ops.fecs_trace.set_filter(g, &dev->filter);
244	return 0;
245	}
246
247	static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
248	struct nvgpu_ctxsw_trace_filter_args *args)
249	{
250	nvgpu_mutex_acquire(&dev->write_lock);
251	args->filter = dev->filter;
252	nvgpu_mutex_release(&dev->write_lock);
253
254	return 0;
255	}
256
257	static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
258	{
259	struct gk20a *g = dev->g;
260	int err;
261
262	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "");
263
264	err = gk20a_busy(g);
265	if (err)
266	return err;
267
268	if (g->ops.fecs_trace.flush)
269	err = g->ops.fecs_trace.flush(g);
270
271	if (likely(!err))
272	err = g->ops.fecs_trace.poll(g);
273
274	gk20a_idle(g);
275	return err;
276	}
277
278	int gk20a_ctxsw_dev_open(struct inode inode, struct file filp)
279	{
280	struct nvgpu_os_linux *l;
281	struct gk20a *g;
282	struct gk20a_ctxsw_trace *trace;
283	struct gk20a_ctxsw_dev *dev;
284	int err;
285	size_t size;
286	u32 n;
287
288	/* only one VM for now */
289	const int vmid = 0;
290
291	l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
292	g = gk20a_get(&l->g);
293	if (!g)
294	return -ENODEV;
295
296	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "g=%p", g);
297
298	if (!capable(CAP_SYS_ADMIN)) {
299	err = -EPERM;
300	goto free_ref;
301	}
302
303	err = gk20a_busy(g);
304	if (err)
305	goto free_ref;
306
307	trace = g->ctxsw_trace;
308	if (!trace) {
309	err = -ENODEV;
310	goto idle;
311	}
312
313	/* Allow only one user for this device */
314	dev = &trace->devs[vmid];
315	nvgpu_mutex_acquire(&dev->write_lock);
316	if (dev->hdr) {
317	err = -EBUSY;
318	goto done;
319	}
320
321	/* By default, allocate ring buffer big enough to accommodate
322	* FECS records with default event filter */
323
324	/* enable all traces by default */
325	NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
326
327	/* compute max number of entries generated with this filter */
328	n = g->ops.fecs_trace.max_entries(g, &dev->filter);
329
330	size = sizeof(struct nvgpu_ctxsw_ring_header) +
331	n * sizeof(struct nvgpu_ctxsw_trace_entry);
332	gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
333	size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
334
335	err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
336	if (!err) {
337	filp->private_data = dev;
338	gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
339	filp, dev, size);
340	}
341
342	done:
343	nvgpu_mutex_release(&dev->write_lock);
344
345	idle:
346	gk20a_idle(g);
347	free_ref:
348	if (err)
349	gk20a_put(g);
350	return err;
351	}
352
353	int gk20a_ctxsw_dev_release(struct inode inode, struct file filp)
354	{
355	struct gk20a_ctxsw_dev *dev = filp->private_data;
356	struct gk20a *g = dev->g;
357
358	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "dev: %p", dev);
359
360	g->ops.fecs_trace.disable(g);
361
362	nvgpu_mutex_acquire(&dev->write_lock);
363	dev->write_enabled = false;
364	nvgpu_mutex_release(&dev->write_lock);
365
366	if (dev->hdr) {
367	dev->g->ops.fecs_trace.free_user_buffer(dev->g);
368	dev->hdr = NULL;
369	}
370	gk20a_put(g);
371	return 0;
372	}
373
374	long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
375	unsigned long arg)
376	{
377	struct gk20a_ctxsw_dev *dev = filp->private_data;
378	struct gk20a *g = dev->g;
379	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
380	int err = 0;
381
382	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
383
384	if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) \|\|
385	(_IOC_NR(cmd) == 0) \|\|
386	(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) \|\|
387	(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
388	return -EINVAL;
389
390	memset(buf, 0, sizeof(buf));
391	if (_IOC_DIR(cmd) & _IOC_WRITE) {
392	if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
393	return -EFAULT;
394	}
395
396	switch (cmd) {
397	case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
398	err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
399	break;
400	case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
401	err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
402	break;
403	case NVGPU_CTXSW_IOCTL_RING_SETUP:
404	err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
405	(struct nvgpu_ctxsw_ring_setup_args *) buf);
406	break;
407	case NVGPU_CTXSW_IOCTL_SET_FILTER:
408	err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
409	(struct nvgpu_ctxsw_trace_filter_args *) buf);
410	break;
411	case NVGPU_CTXSW_IOCTL_GET_FILTER:
412	err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
413	(struct nvgpu_ctxsw_trace_filter_args *) buf);
414	break;
415	case NVGPU_CTXSW_IOCTL_POLL:
416	err = gk20a_ctxsw_dev_ioctl_poll(dev);
417	break;
418	default:
419	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
420	cmd);
421	err = -ENOTTY;
422	}
423
424	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
425	err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
426
427	return err;
428	}
429
430	unsigned int gk20a_ctxsw_dev_poll(struct file filp, poll_table wait)
431	{
432	struct gk20a_ctxsw_dev *dev = filp->private_data;
433	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
434	unsigned int mask = 0;
435
436	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "");
437
438	nvgpu_mutex_acquire(&dev->write_lock);
439	poll_wait(filp, &dev->readout_wq.wq, wait);
440	if (!ring_is_empty(hdr))
441	mask \|= POLLIN \| POLLRDNORM;
442	nvgpu_mutex_release(&dev->write_lock);
443
444	return mask;
445	}
446
447	static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
448	{
449	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
450
451	nvgpu_atomic_inc(&dev->vma_ref);
452	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "vma_ref=%d",
453	nvgpu_atomic_read(&dev->vma_ref));
454	}
455
456	static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
457	{
458	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
459
460	nvgpu_atomic_dec(&dev->vma_ref);
461	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "vma_ref=%d",
462	nvgpu_atomic_read(&dev->vma_ref));
463	}
464
465	static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
466	.open = gk20a_ctxsw_dev_vma_open,
467	.close = gk20a_ctxsw_dev_vma_close,
468	};
469
470	int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
471	struct vm_area_struct *vma)
472	{
473	return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
474	}
475
476	int gk20a_ctxsw_dev_mmap(struct file filp, struct vm_area_struct vma)
477	{
478	struct gk20a_ctxsw_dev *dev = filp->private_data;
479	int ret;
480
481	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
482	vma->vm_start, vma->vm_end);
483
484	ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
485	if (likely(!ret)) {
486	vma->vm_private_data = dev;
487	vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
488	vma->vm_ops->open(vma);
489	}
490
491	return ret;
492	}
493
494	#ifdef CONFIG_GK20A_CTXSW_TRACE
495	static int gk20a_ctxsw_init_devs(struct gk20a *g)
496	{
497	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
498	struct gk20a_ctxsw_dev *dev = trace->devs;
499	int err;
500	int i;
501
502	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
503	dev->g = g;
504	dev->hdr = NULL;
505	dev->write_enabled = false;
506	nvgpu_cond_init(&dev->readout_wq);
507	err = nvgpu_mutex_init(&dev->write_lock);
508	if (err)
509	return err;
510	nvgpu_atomic_set(&dev->vma_ref, 0);
511	dev++;
512	}
513	return 0;
514	}
515	#endif
516
517	int gk20a_ctxsw_trace_init(struct gk20a *g)
518	{
519	#ifdef CONFIG_GK20A_CTXSW_TRACE
520	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
521	int err;
522
523	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
524
525	/* if tracing is not supported, skip this */
526	if (!g->ops.fecs_trace.init)
527	return 0;
528
529	if (likely(trace))
530	return 0;
531
532	trace = nvgpu_kzalloc(g, sizeof(*trace));
533	if (unlikely(!trace))
534	return -ENOMEM;
535	g->ctxsw_trace = trace;
536
537	err = gk20a_ctxsw_init_devs(g);
538	if (err)
539	goto fail;
540
541	err = g->ops.fecs_trace.init(g);
542	if (unlikely(err))
543	goto fail;
544
545	return 0;
546
547	fail:
548	memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
549	nvgpu_kfree(g, trace);
550	g->ctxsw_trace = NULL;
551	return err;
552	#else
553	return 0;
554	#endif
555	}
556
557	void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
558	{
559	#ifdef CONFIG_GK20A_CTXSW_TRACE
560	struct gk20a_ctxsw_trace *trace;
561	struct gk20a_ctxsw_dev *dev;
562	int i;
563
564	if (!g->ctxsw_trace)
565	return;
566
567	trace = g->ctxsw_trace;
568	dev = trace->devs;
569
570	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
571	nvgpu_mutex_destroy(&dev->write_lock);
572	dev++;
573	}
574
575	nvgpu_kfree(g, g->ctxsw_trace);
576	g->ctxsw_trace = NULL;
577
578	g->ops.fecs_trace.deinit(g);
579	#endif
580	}
581
582	int gk20a_ctxsw_trace_write(struct gk20a *g,
583	struct nvgpu_ctxsw_trace_entry *entry)
584	{
585	struct nvgpu_ctxsw_ring_header *hdr;
586	struct gk20a_ctxsw_dev *dev;
587	int ret = 0;
588	const char *reason;
589	u32 write_idx;
590
591	if (!g->ctxsw_trace)
592	return 0;
593
594	if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
595	return -ENODEV;
596
597	dev = &g->ctxsw_trace->devs[entry->vmid];
598	hdr = dev->hdr;
599
600	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_ctxsw,
601	"dev=%p hdr=%p", dev, hdr);
602
603	nvgpu_mutex_acquire(&dev->write_lock);
604
605	if (unlikely(!hdr)) {
606	/* device has been released */
607	ret = -ENODEV;
608	goto done;
609	}
610
611	write_idx = hdr->write_idx;
612	if (write_idx >= dev->num_ents) {
613	nvgpu_err(dev->g,
614	"write_idx=%u out of range [0..%u]",
615	write_idx, dev->num_ents);
616	ret = -ENOSPC;
617	reason = "write_idx out of range";
618	goto disable;
619	}
620
621	entry->seqno = hdr->write_seqno++;
622
623	if (!dev->write_enabled) {
624	ret = -EBUSY;
625	reason = "write disabled";
626	goto drop;
627	}
628
629	if (unlikely(ring_is_full(hdr))) {
630	ret = -ENOSPC;
631	reason = "user fifo full";
632	goto drop;
633	}
634
635	if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
636	reason = "filtered out";
637	goto filter;
638	}
639
640	gk20a_dbg(gpu_dbg_ctxsw,
641	"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
642	entry->seqno, entry->context_id, entry->pid,
643	entry->tag, entry->timestamp);
644
645	dev->ents[write_idx] = *entry;
646
647	/* ensure record is written before updating write index */
648	nvgpu_smp_wmb();
649
650	write_idx++;
651	if (unlikely(write_idx >= hdr->num_ents))
652	write_idx = 0;
653	hdr->write_idx = write_idx;
654	gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
655	hdr->read_idx, hdr->write_idx, ring_len(hdr));
656
657	nvgpu_mutex_release(&dev->write_lock);
658	return ret;
659
660	disable:
661	g->ops.fecs_trace.disable(g);
662
663	drop:
664	hdr->drop_count++;
665
666	filter:
667	gk20a_dbg(gpu_dbg_ctxsw,
668	"dropping seqno=%d context_id=%08x pid=%lld "
669	"tag=%x time=%llx (%s)",
670	entry->seqno, entry->context_id, entry->pid,
671	entry->tag, entry->timestamp, reason);
672
673	done:
674	nvgpu_mutex_release(&dev->write_lock);
675	return ret;
676	}
677
678	void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
679	{
680	struct gk20a_ctxsw_dev *dev;
681
682	if (!g->ctxsw_trace)
683	return;
684
685	dev = &g->ctxsw_trace->devs[vmid];
686	nvgpu_cond_signal_interruptible(&dev->readout_wq);
687	}
688
689	void gk20a_ctxsw_trace_channel_reset(struct gk20a g, struct channel_gk20a ch)
690	{
691	#ifdef CONFIG_GK20A_CTXSW_TRACE
692	struct nvgpu_ctxsw_trace_entry entry = {
693	.vmid = 0,
694	.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
695	.context_id = 0,
696	.pid = ch->tgid,
697	};
698
699	if (!g->ctxsw_trace)
700	return;
701
702	g->ops.bus.read_ptimer(g, &entry.timestamp);
703	gk20a_ctxsw_trace_write(g, &entry);
704	gk20a_ctxsw_trace_wake_up(g, 0);
705	#endif
706	trace_gk20a_channel_reset(ch->chid, ch->tsgid);
707	}
708
709	void gk20a_ctxsw_trace_tsg_reset(struct gk20a g, struct tsg_gk20a tsg)
710	{
711	#ifdef CONFIG_GK20A_CTXSW_TRACE
712	struct nvgpu_ctxsw_trace_entry entry = {
713	.vmid = 0,
714	.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
715	.context_id = 0,
716	.pid = tsg->tgid,
717	};
718
719	if (!g->ctxsw_trace)
720	return;
721
722	g->ops.bus.read_ptimer(g, &entry.timestamp);
723	gk20a_ctxsw_trace_write(g, &entry);
724	gk20a_ctxsw_trace_wake_up(g, 0);
725	#endif
726	trace_gk20a_channel_reset(~0, tsg->tsgid);
727	}