7 files changed, 48 insertions, 747 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0d011b06..546f4164 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        gk20a_dbg_info("freeing bound channel context, timeout=%ld",
                        timeout);
+#ifdef CONFIG_GK20A_CTXSW_TRACE
        if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
                g->ops.fecs_trace.unbind_channel(g, ch);
+#endif
        /* release channel ctx */
        g->ops.gr.free_channel_ctx(ch, was_tsg);
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
deleted file mode 100644
index fb33de23..00000000
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ /dev/null
@@ -1,727 +0,0 @@
-/*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include <asm/barrier.h>
-#include <linux/wait.h>
-#include <linux/ktime.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#include <trace/events/gk20a.h>
-#include <uapi/linux/nvgpu.h>
-#include <nvgpu/kmem.h>
-#include "ctxsw_trace_gk20a.h"
-#include "gk20a.h"
-#include "platform_gk20a.h"
-#include "gr_gk20a.h"
-#include "common/linux/os_linux.h"
-#include <nvgpu/log.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/barrier.h>
-#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE      (128*PAGE_SIZE)
-/* Userland-facing FIFO (one global + eventually one per VM) */
-struct gk20a_ctxsw_dev {
-        struct gk20a *g;
-        struct nvgpu_ctxsw_ring_header *hdr;
-        struct nvgpu_ctxsw_trace_entry *ents;
-        struct nvgpu_ctxsw_trace_filter filter;
-        bool write_enabled;
-        struct nvgpu_cond readout_wq;
-        size_t size;
-        u32 num_ents;
-        nvgpu_atomic_t vma_ref;
-        struct nvgpu_mutex write_lock;
-};
-struct gk20a_ctxsw_trace {
-        struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
-};
-static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
-{
-        return (hdr->write_idx == hdr->read_idx);
-}
-static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
-{
-        return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
-}
-static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
-{
-        return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
-}
-ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
-        loff_t *off)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
-        struct nvgpu_ctxsw_trace_entry __user *entry =
-                (struct nvgpu_ctxsw_trace_entry *) buf;
-        size_t copied = 0;
-        int err;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
-                "filp=%p buf=%p size=%zu", filp, buf, size);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        while (ring_is_empty(hdr)) {
-                nvgpu_mutex_release(&dev->write_lock);
-                if (filp->f_flags & O_NONBLOCK)
-                        return -EAGAIN;
-                err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
-                        !ring_is_empty(hdr), 0);
-                if (err)
-                        return err;
-                nvgpu_mutex_acquire(&dev->write_lock);
-        }
-        while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
-                if (ring_is_empty(hdr))
-                        break;
-                if (copy_to_user(entry, &dev->ents[hdr->read_idx],
-                        sizeof(*entry))) {
-                        nvgpu_mutex_release(&dev->write_lock);
-                        return -EFAULT;
-                }
-                hdr->read_idx++;
-                if (hdr->read_idx >= hdr->num_ents)
-                        hdr->read_idx = 0;
-                entry++;
-                copied += sizeof(*entry);
-                size -= sizeof(*entry);
-        }
-        gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
-                hdr->read_idx);
-        *off = hdr->read_idx;
-        nvgpu_mutex_release(&dev->write_lock);
-        return copied;
-}
-static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
-{
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->write_enabled = true;
-        nvgpu_mutex_release(&dev->write_lock);
-        dev->g->ops.fecs_trace.enable(dev->g);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
-{
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
-        dev->g->ops.fecs_trace.disable(dev->g);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->write_enabled = false;
-        nvgpu_mutex_release(&dev->write_lock);
-        return 0;
-}
-static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
-                                        size_t size)
-{
-        struct gk20a *g = dev->g;
-        void *buf;
-        int err;
-        if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
-                return -EBUSY;
-        err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
-        if (err)
-                return err;
-        dev->hdr = buf;
-        dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
-        dev->size = size;
-        dev->num_ents = dev->hdr->num_ents;
-        gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
-                dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
-        return 0;
-}
-int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
-                void **buf, size_t *size)
-{
-        struct nvgpu_ctxsw_ring_header *hdr;
-        *size = roundup(*size, PAGE_SIZE);
-        hdr = vmalloc_user(*size);
-        if (!hdr)
-                return -ENOMEM;
-        hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
-        hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
-        hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
-                / sizeof(struct nvgpu_ctxsw_trace_entry);
-        hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
-        hdr->drop_count = 0;
-        hdr->read_idx = 0;
-        hdr->write_idx = 0;
-        hdr->write_seqno = 0;
-        *buf = hdr;
-        return 0;
-}
-int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
-{
-        struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
-        nvgpu_vfree(g, dev->hdr);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
-        struct nvgpu_ctxsw_ring_setup_args *args)
-{
-        size_t size = args->size;
-        int ret;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
-        if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
-                return -EINVAL;
-        nvgpu_mutex_acquire(&dev->write_lock);
-        ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-        nvgpu_mutex_release(&dev->write_lock);
-        return ret;
-}
-static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
-        struct nvgpu_ctxsw_trace_filter_args *args)
-{
-        struct gk20a *g = dev->g;
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->filter = args->filter;
-        nvgpu_mutex_release(&dev->write_lock);
-        if (g->ops.fecs_trace.set_filter)
-                g->ops.fecs_trace.set_filter(g, &dev->filter);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
-        struct nvgpu_ctxsw_trace_filter_args *args)
-{
-        nvgpu_mutex_acquire(&dev->write_lock);
-        args->filter = dev->filter;
-        nvgpu_mutex_release(&dev->write_lock);
-        return 0;
-}
-static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
-{
-        struct gk20a *g = dev->g;
-        int err;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
-        err = gk20a_busy(g);
-        if (err)
-                return err;
-        if (g->ops.fecs_trace.flush)
-                err = g->ops.fecs_trace.flush(g);
-        if (likely(!err))
-                err = g->ops.fecs_trace.poll(g);
-        gk20a_idle(g);
-        return err;
-}
-int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
-{
-        struct nvgpu_os_linux *l;
-        struct gk20a *g;
-        struct gk20a_ctxsw_trace *trace;
-        struct gk20a_ctxsw_dev *dev;
-        int err;
-        size_t size;
-        u32 n;
-        /* only one VM for now */
-        const int vmid = 0;
-        l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
-        g = gk20a_get(&l->g);
-        if (!g)
-                return -ENODEV;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
-        if (!capable(CAP_SYS_ADMIN)) {
-                err = -EPERM;
-                goto free_ref;
-        }
-        err = gk20a_busy(g);
-        if (err)
-                goto free_ref;
-        trace = g->ctxsw_trace;
-        if (!trace) {
-                err = -ENODEV;
-                goto idle;
-        }
-        /* Allow only one user for this device */
-        dev = &trace->devs[vmid];
-        nvgpu_mutex_acquire(&dev->write_lock);
-        if (dev->hdr) {
-                err = -EBUSY;
-                goto done;
-        }
-        /* By default, allocate ring buffer big enough to accommodate
-         * FECS records with default event filter */
-        /* enable all traces by default */
-        NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
-        /* compute max number of entries generated with this filter */
-        n = g->ops.fecs_trace.max_entries(g, &dev->filter);
-        size = sizeof(struct nvgpu_ctxsw_ring_header) +
-                        n * sizeof(struct nvgpu_ctxsw_trace_entry);
-        gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
-                size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
-        err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-        if (!err) {
-                filp->private_data = dev;
-                gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
-                        filp, dev, size);
-        }
-done:
-        nvgpu_mutex_release(&dev->write_lock);
-idle:
-        gk20a_idle(g);
-free_ref:
-        if (err)
-                gk20a_put(g);
-        return err;
-}
-int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct gk20a *g = dev->g;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
-        g->ops.fecs_trace.disable(g);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        dev->write_enabled = false;
-        nvgpu_mutex_release(&dev->write_lock);
-        if (dev->hdr) {
-                dev->g->ops.fecs_trace.free_user_buffer(dev->g);
-                dev->hdr = NULL;
-        }
-        gk20a_put(g);
-        return 0;
-}
-long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
-        unsigned long arg)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct gk20a *g = dev->g;
-        u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
-        int err = 0;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
-        if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
-                (_IOC_NR(cmd) == 0) ||
-                (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
-                (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
-                return -EINVAL;
-        memset(buf, 0, sizeof(buf));
-        if (_IOC_DIR(cmd) & _IOC_WRITE) {
-                if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
-                        return -EFAULT;
-        }
-        switch (cmd) {
-        case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
-                err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
-                break;
-        case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
-                err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
-                break;
-        case NVGPU_CTXSW_IOCTL_RING_SETUP:
-                err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
-                        (struct nvgpu_ctxsw_ring_setup_args *) buf);
-                break;
-        case NVGPU_CTXSW_IOCTL_SET_FILTER:
-                err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
-                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
-                break;
-        case NVGPU_CTXSW_IOCTL_GET_FILTER:
-                err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
-                        (struct nvgpu_ctxsw_trace_filter_args *) buf);
-                break;
-        case NVGPU_CTXSW_IOCTL_POLL:
-                err = gk20a_ctxsw_dev_ioctl_poll(dev);
-                break;
-        default:
-                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
-                        cmd);
-                err = -ENOTTY;
-        }
-        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-                err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
-        return err;
-}
-unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
-        unsigned int mask = 0;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
-        nvgpu_mutex_acquire(&dev->write_lock);
-        poll_wait(filp, &dev->readout_wq.wq, wait);
-        if (!ring_is_empty(hdr))
-                mask |= POLLIN | POLLRDNORM;
-        nvgpu_mutex_release(&dev->write_lock);
-        return mask;
-}
-static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
-{
-        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-        nvgpu_atomic_inc(&dev->vma_ref);
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-                nvgpu_atomic_read(&dev->vma_ref));
-}
-static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
-{
-        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-        nvgpu_atomic_dec(&dev->vma_ref);
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-                nvgpu_atomic_read(&dev->vma_ref));
-}
-static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
-        .open = gk20a_ctxsw_dev_vma_open,
-        .close = gk20a_ctxsw_dev_vma_close,
-};
-int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
-                                struct vm_area_struct *vma)
-{
-        return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
-}
-int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-        struct gk20a_ctxsw_dev *dev = filp->private_data;
-        int ret;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
-                vma->vm_start, vma->vm_end);
-        ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
-        if (likely(!ret)) {
-                vma->vm_private_data = dev;
-                vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
-                vma->vm_ops->open(vma);
-        }
-        return ret;
-}
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-static int gk20a_ctxsw_init_devs(struct gk20a *g)
-{
-        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
-        struct gk20a_ctxsw_dev *dev = trace->devs;
-        int err;
-        int i;
-        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
-                dev->g = g;
-                dev->hdr = NULL;
-                dev->write_enabled = false;
-                nvgpu_cond_init(&dev->readout_wq);
-                err = nvgpu_mutex_init(&dev->write_lock);
-                if (err)
-                        return err;
-                nvgpu_atomic_set(&dev->vma_ref, 0);
-                dev++;
-        }
-        return 0;
-}
-#endif
-int gk20a_ctxsw_trace_init(struct gk20a *g)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
-        int err;
-        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
-        /* if tracing is not supported, skip this */
-        if (!g->ops.fecs_trace.init)
-                return 0;
-        if (likely(trace))
-                return 0;
-        trace = nvgpu_kzalloc(g, sizeof(*trace));
-        if (unlikely(!trace))
-                return -ENOMEM;
-        g->ctxsw_trace = trace;
-        err = gk20a_ctxsw_init_devs(g);
-        if (err)
-                goto fail;
-        err = g->ops.fecs_trace.init(g);
-        if (unlikely(err))
-                goto fail;
-        return 0;
-fail:
-        memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
-        nvgpu_kfree(g, trace);
-        g->ctxsw_trace = NULL;
-        return err;
-#else
-        return 0;
-#endif
-}
-void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct gk20a_ctxsw_trace *trace;
-        struct gk20a_ctxsw_dev *dev;
-        int i;
-        if (!g->ctxsw_trace)
-                return;
-        trace = g->ctxsw_trace;
-        dev = trace->devs;
-        for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
-                nvgpu_mutex_destroy(&dev->write_lock);
-                dev++;
-        }
-        nvgpu_kfree(g, g->ctxsw_trace);
-        g->ctxsw_trace = NULL;
-        g->ops.fecs_trace.deinit(g);
-#endif
-}
-int gk20a_ctxsw_trace_write(struct gk20a *g,
-                struct nvgpu_ctxsw_trace_entry *entry)
-{
-        struct nvgpu_ctxsw_ring_header *hdr;
-        struct gk20a_ctxsw_dev *dev;
-        int ret = 0;
-        const char *reason;
-        u32 write_idx;
-        if (!g->ctxsw_trace)
-                return 0;
-        if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
-                return -ENODEV;
-        dev = &g->ctxsw_trace->devs[entry->vmid];
-        hdr = dev->hdr;
-        gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
-                "dev=%p hdr=%p", dev, hdr);
-        nvgpu_mutex_acquire(&dev->write_lock);
-        if (unlikely(!hdr)) {
-                /* device has been released */
-                ret = -ENODEV;
-                goto done;
-        }
-        write_idx = hdr->write_idx;
-        if (write_idx >= dev->num_ents) {
-                nvgpu_err(dev->g,
-                        "write_idx=%u out of range [0..%u]",
-                        write_idx, dev->num_ents);
-                ret = -ENOSPC;
-                reason = "write_idx out of range";
-                goto disable;
-        }
-        entry->seqno = hdr->write_seqno++;
-        if (!dev->write_enabled) {
-                ret = -EBUSY;
-                reason = "write disabled";
-                goto drop;
-        }
-        if (unlikely(ring_is_full(hdr))) {
-                ret = -ENOSPC;
-                reason = "user fifo full";
-                goto drop;
-        }
-        if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
-                reason = "filtered out";
-                goto filter;
-        }
-        gk20a_dbg(gpu_dbg_ctxsw,
-                "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
-                entry->seqno, entry->context_id, entry->pid,
-                entry->tag, entry->timestamp);
-        dev->ents[write_idx] = *entry;
-        /* ensure record is written before updating write index */
-        nvgpu_smp_wmb();
-        write_idx++;
-        if (unlikely(write_idx >= hdr->num_ents))
-                write_idx = 0;
-        hdr->write_idx = write_idx;
-        gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
-                hdr->read_idx, hdr->write_idx, ring_len(hdr));
-        nvgpu_mutex_release(&dev->write_lock);
-        return ret;
-disable:
-        g->ops.fecs_trace.disable(g);
-drop:
-        hdr->drop_count++;
-filter:
-        gk20a_dbg(gpu_dbg_ctxsw,
-                        "dropping seqno=%d context_id=%08x pid=%lld "
-                        "tag=%x time=%llx (%s)",
-                        entry->seqno, entry->context_id, entry->pid,
-                        entry->tag, entry->timestamp, reason);
-done:
-        nvgpu_mutex_release(&dev->write_lock);
-        return ret;
-}
-void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
-{
-        struct gk20a_ctxsw_dev *dev;
-        if (!g->ctxsw_trace)
-                return;
-        dev = &g->ctxsw_trace->devs[vmid];
-        nvgpu_cond_signal_interruptible(&dev->readout_wq);
-}
-void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct nvgpu_ctxsw_trace_entry entry = {
-                .vmid = 0,
-                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-                .context_id = 0,
-                .pid = ch->tgid,
-        };
-        if (!g->ctxsw_trace)
-                return;
-        g->ops.bus.read_ptimer(g, &entry.timestamp);
-        gk20a_ctxsw_trace_write(g, &entry);
-        gk20a_ctxsw_trace_wake_up(g, 0);
-#endif
-        trace_gk20a_channel_reset(ch->chid, ch->tsgid);
-}
-void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-        struct nvgpu_ctxsw_trace_entry entry = {
-                .vmid = 0,
-                .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-                .context_id = 0,
-                .pid = tsg->tgid,
-        };
-        if (!g->ctxsw_trace)
-                return;
-        g->ops.bus.read_ptimer(g, &entry.timestamp);
-        gk20a_ctxsw_trace_write(g, &entry);
-        gk20a_ctxsw_trace_wake_up(g, 0);
-#endif
-        trace_gk20a_channel_reset(~0, tsg->tsgid);
-}
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
index b270581b..dddb8603 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
@@ -23,6 +23,8 @@
 #ifndef __CTXSW_TRACE_GK20A_H
 #define __CTXSW_TRACE_GK20A_H
+#include <nvgpu/types.h>
 #define GK20A_CTXSW_TRACE_NUM_DEVS                      1
 struct file;
@@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
 int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
 long gk20a_ctxsw_dev_ioctl(struct file *filp,
                         unsigned int cmd, unsigned long arg);
-ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *);
+ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
-unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *);
+                             size_t size, loff_t *offs);
-int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *);
+unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
+                                  struct poll_table_struct *pts);
+int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma);
 int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size);
 int gk20a_ctxsw_dev_ring_free(struct gk20a *g);
 int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma);
-int gk20a_ctxsw_trace_init(struct gk20a *);
+int gk20a_ctxsw_trace_init(struct gk20a *g);
-void gk20a_ctxsw_trace_cleanup(struct gk20a *);
+void gk20a_ctxsw_trace_cleanup(struct gk20a *g);
-int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *);
+int gk20a_ctxsw_trace_write(struct gk20a *g,
+                            struct nvgpu_ctxsw_trace_entry *entry);
 void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
 void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch);
 void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg);
 #endif /* __CTXSW_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index d10af9e9..17ae626b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
                        if (nvgpu_pmu_disable_elpg(g))
                                nvgpu_err(g, "failed to set disable elpg");
                }
-                /* resetting engine will alter read/write index.
-                 * need to flush circular buffer before re-enabling FECS.
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+                /*
+                 * Resetting engine will alter read/write index. Need to flush
+                 * circular buffer before re-enabling FECS.
                 */
                if (g->ops.fecs_trace.reset)
                        g->ops.fecs_trace.reset(g);
-                /*HALT_PIPELINE method, halt GR engine*/
+#endif
+                /* HALT_PIPELINE method, halt GR engine. */
                if (gr_gk20a_halt_pipe(g))
                        nvgpu_err(g, "failed to HALT gr pipe");
-                /* resetting engine using mc_enable_r() is not
-                enough, we do full init sequence */
+                /*
+                 * Resetting engine using mc_enable_r() is not enough; we must
+                 * do full init sequence.
+                 */
                gk20a_gr_reset(g);
                if (g->support_pmu && g->can_elpg)
                        nvgpu_pmu_enable_elpg(g);
@@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault(
                                }
                        }
                }
+#ifdef CONFIG_GK20A_CTXSW_TRACE
                /*
                 * For non fake mmu fault, both tsg and ch pointers
                 * could be valid. Check tsg first.
@@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault(
                        gk20a_ctxsw_trace_tsg_reset(g, tsg);
                else if (ch)
                        gk20a_ctxsw_trace_channel_reset(g, ch);
+#endif
-                /* disable the channel/TSG from hw and increment
+                /*
-                 * syncpoints */
+                 * Disable the channel/TSG from hw and increment syncpoints.
+                 */
                if (tsg) {
                        if (!g->fifo.deferred_reset_pending) {
                                if (!fake_fault)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 47f6c56c..703a7c0c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g)
                goto done;
        }
+#ifdef CONFIG_GK20A_CTXSW_TRACE
        err = gk20a_ctxsw_trace_init(g);
        if (err)
                nvgpu_warn(g, "could not initialize ctxsw tracing");
+#endif
        err = gk20a_sched_ctrl_init(g);
        if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d7fdffb0..a34f06b2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -689,18 +689,25 @@ struct gpu_ops {
                int (*get_netlist_name)(struct gk20a *g, int index, char *name);
                bool (*is_fw_defined)(void);
        } gr_ctx;
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        /*
+         * Currently only supported on Linux due to the extremely tight
+         * integration with Linux device driver structure (in particular
+         * mmap).
+         */
        struct {
                int (*init)(struct gk20a *g);
                int (*max_entries)(struct gk20a *,
-                        struct nvgpu_ctxsw_trace_filter *);
+                        struct nvgpu_ctxsw_trace_filter *filter);
                int (*flush)(struct gk20a *g);
                int (*poll)(struct gk20a *g);
                int (*enable)(struct gk20a *g);
                int (*disable)(struct gk20a *g);
                bool (*is_enabled)(struct gk20a *g);
                int (*reset)(struct gk20a *g);
-                int (*bind_channel)(struct gk20a *, struct channel_gk20a *);
+                int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch);
-                int (*unbind_channel)(struct gk20a *, struct channel_gk20a *);
+                int (*unbind_channel)(struct gk20a *g,
+                                        struct channel_gk20a *ch);
                int (*deinit)(struct gk20a *g);
                int (*alloc_user_buffer)(struct gk20a *g,
                                        void **buf, size_t *size);
@@ -710,6 +717,7 @@ struct gpu_ops {
                int (*set_filter)(struct gk20a *g,
                        struct nvgpu_ctxsw_trace_filter *filter);
        } fecs_trace;
+#endif
        struct {
                bool (*support_sparse)(struct gk20a *g);
                u64 (*gmmu_map)(struct vm_gk20a *vm,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1ea59a9d..f78d862c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
                                "fail to load golden ctx image");
                        goto out;
                }
+#ifdef CONFIG_GK20A_CTXSW_TRACE
                if (g->ops.fecs_trace.bind_channel && !c->vpr) {
                        err = g->ops.fecs_trace.bind_channel(g, c);
-                        if (err) {
+                        if (err)
                                nvgpu_warn(g,
                                        "fail to bind channel for ctxsw trace");
-                        }
                }
+#endif
                c->first_init = true;
        }