From 1c40d09c4c9c011c1318c328c0b4b6b17d1f537e Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 19 Aug 2015 14:27:51 -0700 Subject: gpu: nvgpu: Add support for FECS ctxsw tracing bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/Kconfig | 10 + drivers/gpu/nvgpu/Makefile | 4 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 + drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 586 ++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | 41 ++ drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 763 ++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h | 20 + drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 19 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 49 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 26 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 + drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 2 + drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | 190 ++++++- drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h | 2 +- drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c | 21 + drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h | 20 + drivers/gpu/nvgpu/vgpu/vgpu.c | 2 + 17 files changed, 1756 insertions(+), 11 deletions(-) create mode 100644 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h create mode 100644 drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig index d0e25aa2..94173976 100644 --- a/drivers/gpu/nvgpu/Kconfig +++ b/drivers/gpu/nvgpu/Kconfig @@ -54,6 +54,16 @@ config GK20A_CYCLE_STATS help Say Y here to enable the cycle stats debugging features. +config GK20A_CTXSW_TRACE + bool "Support GK20A Context Switch tracing" + depends on GK20A + default n + help + Enable support for the GK20A Context Switch Tracing. In this mode, + FECS collects timestamps for contexts loaded on GR engine. This + allows tracking context switches on GR engine, as well as + identifying processes that submitted work. + config TEGRA_GK20A bool "Enable the GK20A GPU on Tegra" depends on TEGRA_GRHOST || TEGRA_HOST1X diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 932dde1a..df660eb7 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -46,6 +46,8 @@ nvgpu-y := \ gk20a/cde_gk20a.o \ gk20a/platform_gk20a_generic.o \ gk20a/tsg_gk20a.o \ + gk20a/ctxsw_trace_gk20a.o \ + gk20a/fecs_trace_gk20a.o \ gk20a/mc_gk20a.o \ gm20b/hal_gm20b.o \ gm20b/ltc_gm20b.o \ @@ -64,7 +66,6 @@ nvgpu-y := \ gm20b/debug_gm20b.o \ gm20b/cde_gm20b.o \ gm20b/therm_gm20b.o - nvgpu-$(CONFIG_TEGRA_GK20A) += gk20a/platform_gk20a_tegra.o nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o @@ -78,6 +79,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ vgpu/debug_vgpu.o \ vgpu/vgpu.o \ vgpu/dbg_vgpu.o \ + vgpu/fecs_trace_vgpu.o \ vgpu/gk20a/vgpu_hal_gk20a.o \ vgpu/gk20a/vgpu_gr_gk20a.o \ vgpu/gm20b/vgpu_hal_gm20b.o \ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 9b1f2987..0dd1fb8b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -28,6 +28,7 @@ #include #include "debug_gk20a.h" +#include "ctxsw_trace_gk20a.h" #include "gk20a.h" #include "dbg_gpu_gk20a.h" @@ -920,6 +921,9 @@ static void gk20a_free_channel(struct channel_gk20a *ch) gk20a_free_error_notifiers(ch); + if (g->ops.fecs_trace.unbind_channel) + g->ops.fecs_trace.unbind_channel(g, ch); + /* release channel ctx */ g->ops.gr.free_channel_ctx(ch); diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c new file mode 100644 index 00000000..9e7c04ad --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctxsw_trace_gk20a.h" +#include "gk20a.h" +#include "gr_gk20a.h" +#include "hw_ctxsw_prog_gk20a.h" +#include "hw_gr_gk20a.h" + +#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) + +/* Userland-facing FIFO (one global + eventually one per VM) */ +struct gk20a_ctxsw_dev { + struct gk20a *g; + + struct nvgpu_ctxsw_ring_header *hdr; + struct nvgpu_ctxsw_trace_entry *ents; + struct nvgpu_ctxsw_trace_filter filter; + bool write_enabled; + wait_queue_head_t readout_wq; + size_t size; + + atomic_t vma_ref; + + struct mutex lock; +}; + + +struct gk20a_ctxsw_trace { + struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; +}; + +static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) +{ + return (hdr->write_idx == hdr->read_idx); +} + +static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) +{ + return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; +} + +static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) +{ + return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; +} + +static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr) +{ + return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents; +} + +ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, + loff_t *off) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; + struct nvgpu_ctxsw_trace_entry __user *entry = + (struct nvgpu_ctxsw_trace_entry *) buf; + size_t copied = 0; + int err; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, + "filp=%p buf=%p size=%zu", filp, buf, size); + + mutex_lock(&dev->lock); + while (ring_is_empty(hdr)) { + mutex_unlock(&dev->lock); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + err = wait_event_interruptible(dev->readout_wq, + !ring_is_empty(hdr)); + if (err) + return err; + mutex_lock(&dev->lock); + } + + while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { + if (ring_is_empty(hdr)) + break; + + if (copy_to_user(entry, &dev->ents[hdr->read_idx], + sizeof(*entry))) { + mutex_unlock(&dev->lock); + return -EFAULT; + } + + hdr->read_idx++; + if (hdr->read_idx >= hdr->num_ents) + hdr->read_idx = 0; + + entry++; + copied += sizeof(*entry); + size -= sizeof(*entry); + } + + gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, + hdr->read_idx); + + *off = hdr->read_idx; + mutex_unlock(&dev->lock); + + return copied; +} + +static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) +{ + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); + dev->write_enabled = true; + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) +{ + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); + dev->write_enabled = false; + return 0; +} + +static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, + size_t size) +{ + struct nvgpu_ctxsw_ring_header *hdr; + + if (atomic_read(&dev->vma_ref)) + return -EBUSY; + + if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) + return -EBUSY; + + size = roundup(size, PAGE_SIZE); + hdr = vmalloc_user(size); + if (!hdr) + return -ENOMEM; + + if (dev->hdr) + vfree(dev->hdr); + + dev->hdr = hdr; + dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); + dev->size = size; + + hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; + hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; + hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header)) + / sizeof(struct nvgpu_ctxsw_trace_entry); + hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); + hdr->drop_count = 0; + hdr->read_idx = 0; + hdr->write_idx = 0; + hdr->write_seqno = 0; + + gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", + dev->size, dev->hdr, dev->ents, hdr->num_ents); + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, + struct nvgpu_ctxsw_ring_setup_args *args) +{ + size_t size = args->size; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); + + if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) + return -EINVAL; + + return gk20a_ctxsw_dev_ring_alloc(dev, size); +} + +static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, + struct nvgpu_ctxsw_trace_filter_args *args) +{ + dev->filter = args->filter; + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, + struct nvgpu_ctxsw_trace_filter_args *args) +{ + args->filter = dev->filter; + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) +{ + struct gk20a *g = dev->g; + int err; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); + + err = gk20a_busy(g->dev); + if (err) + return err; + + if (g->ops.fecs_trace.flush(g)) + err = g->ops.fecs_trace.flush(g); + + if (likely(!err)) + err = g->ops.fecs_trace.poll(g); + + gk20a_idle(g->dev); + return err; +} + +int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) +{ + struct gk20a *g; + struct gk20a_ctxsw_trace *trace; + struct gk20a_ctxsw_dev *dev; + int err; + size_t size; + u32 n; + + /* only one VM for now */ + const int vmid = 0; + + g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = gk20a_busy(g->dev); + if (err) + return err; + + trace = g->ctxsw_trace; + if (!trace) { + err = -ENODEV; + goto idle; + } + + /* Allow only one user for this device */ + dev = &trace->devs[vmid]; + mutex_lock(&dev->lock); + if (dev->hdr) { + err = -EBUSY; + goto done; + } + + /* By default, allocate ring buffer big enough to accommodate + * FECS records with default event filter */ + + /* enable all traces by default */ + NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); + + /* compute max number of entries generated with this filter */ + n = g->ops.fecs_trace.max_entries(g, &dev->filter); + + size = sizeof(struct nvgpu_ctxsw_ring_header) + + n * sizeof(struct nvgpu_ctxsw_trace_entry); + gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", + size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); + + err = gk20a_ctxsw_dev_ring_alloc(dev, size); + if (!err) { + filp->private_data = dev; + gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", + filp, dev, size); + } + + err = g->ops.fecs_trace.enable(g); + +done: + mutex_unlock(&dev->lock); + +idle: + gk20a_idle(g->dev); + + return err; +} + +int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); + + mutex_lock(&dev->lock); + dev->write_enabled = false; + if (dev->hdr) { + vfree(dev->hdr); + dev->hdr = NULL; + } + + g->ops.fecs_trace.disable(g); + + mutex_unlock(&dev->lock); + + return 0; +} + +long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; + u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) + || (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST)) + return -EINVAL; + + BUG_ON(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE); + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + mutex_lock(&dev->lock); + + switch (cmd) { + case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: + err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); + break; + case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: + err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); + break; + case NVGPU_CTXSW_IOCTL_RING_SETUP: + err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, + (struct nvgpu_ctxsw_ring_setup_args *) buf); + break; + case NVGPU_CTXSW_IOCTL_SET_FILTER: + err = gk20a_ctxsw_dev_ioctl_set_filter(dev, + (struct nvgpu_ctxsw_trace_filter_args *) buf); + break; + case NVGPU_CTXSW_IOCTL_GET_FILTER: + err = gk20a_ctxsw_dev_ioctl_get_filter(dev, + (struct nvgpu_ctxsw_trace_filter_args *) buf); + break; + case NVGPU_CTXSW_IOCTL_POLL: + mutex_unlock(&dev->lock); + err = gk20a_ctxsw_dev_ioctl_poll(dev); + mutex_lock(&dev->lock); + break; + default: + dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + } + + mutex_unlock(&dev->lock); + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); + + return err; +} + +unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; + unsigned int mask = 0; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); + + mutex_lock(&dev->lock); + poll_wait(filp, &dev->readout_wq, wait); + if (!ring_is_empty(hdr)) + mask |= POLLIN | POLLRDNORM; + mutex_unlock(&dev->lock); + + return mask; +} + +static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) +{ + struct gk20a_ctxsw_dev *dev = vma->vm_private_data; + + atomic_inc(&dev->vma_ref); + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", + atomic_read(&dev->vma_ref)); +} + +static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) +{ + struct gk20a_ctxsw_dev *dev = vma->vm_private_data; + + atomic_dec(&dev->vma_ref); + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", + atomic_read(&dev->vma_ref)); +} + +static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { + .open = gk20a_ctxsw_dev_vma_open, + .close = gk20a_ctxsw_dev_vma_close, +}; + +int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + int ret; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + + ret = remap_vmalloc_range(vma, dev->hdr, 0); + if (likely(!ret)) { + vma->vm_private_data = dev; + vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; + vma->vm_ops->open(vma); + } + + return ret; +} + +#ifdef CONFIG_GK20A_CTXSW_TRACE +static int gk20a_ctxsw_init_devs(struct gk20a *g) +{ + struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; + struct gk20a_ctxsw_dev *dev = trace->devs; + int i; + + for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { + dev->g = g; + dev->hdr = NULL; + dev->write_enabled = false; + init_waitqueue_head(&dev->readout_wq); + mutex_init(&dev->lock); + atomic_set(&dev->vma_ref, 0); + dev++; + } + return 0; +} +#endif + +int gk20a_ctxsw_trace_init(struct gk20a *g) +{ +#ifdef CONFIG_GK20A_CTXSW_TRACE + struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; + int err; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); + + if (likely(trace)) + return 0; + + trace = kzalloc(sizeof(*trace), GFP_KERNEL); + if (unlikely(!trace)) + return -ENOMEM; + g->ctxsw_trace = trace; + + err = gk20a_ctxsw_init_devs(g); + if (err) + goto fail; + + err = g->ops.fecs_trace.init(g); + if (unlikely(err)) + goto fail; + + return 0; + +fail: + kfree(trace); + g->ctxsw_trace = NULL; + return err; +#else + return 0; +#endif +} + +void gk20a_ctxsw_trace_cleanup(struct gk20a *g) +{ +#ifdef CONFIG_GK20A_CTXSW_TRACE + kfree(g->ctxsw_trace); + g->ctxsw_trace = NULL; + + g->ops.fecs_trace.deinit(g); +#endif +} + +int gk20a_ctxsw_trace_write(struct gk20a *g, + struct nvgpu_ctxsw_trace_entry *entry) +{ + struct nvgpu_ctxsw_ring_header *hdr; + struct gk20a_ctxsw_dev *dev; + int ret = 0; + const char *reason; + + if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) + return -ENODEV; + + dev = &g->ctxsw_trace->devs[entry->vmid]; + hdr = dev->hdr; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, + "dev=%p hdr=%p", dev, hdr); + + mutex_lock(&dev->lock); + + if (unlikely(!hdr)) { + /* device has been released */ + ret = -ENODEV; + goto done; + } + + entry->seqno = hdr->write_seqno++; + + if (!dev->write_enabled) { + ret = -EBUSY; + reason = "write disabled"; + goto drop; + } + + if (unlikely(ring_is_full(hdr))) { + ret = -ENOSPC; + reason = "user fifo full"; + goto drop; + } + + if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { + reason = "filtered out"; + goto filter; + } + + gk20a_dbg(gpu_dbg_ctxsw, + "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", + entry->seqno, entry->context_id, entry->pid, + entry->tag, entry->timestamp); + + dev->ents[hdr->write_idx] = *entry; + + /* ensure record is written before updating write index */ + smp_wmb(); + + hdr->write_idx++; + if (unlikely(hdr->write_idx >= hdr->num_ents)) + hdr->write_idx = 0; + gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", + hdr->read_idx, hdr->write_idx, ring_len(hdr)); + + mutex_unlock(&dev->lock); + return ret; + +drop: + hdr->drop_count++; + +filter: + gk20a_dbg(gpu_dbg_ctxsw, + "dropping seqno=%d context_id=%08x pid=%lld " + "tag=%x time=%llx (%s)", + entry->seqno, entry->context_id, entry->pid, + entry->tag, entry->timestamp, reason); + +done: + mutex_unlock(&dev->lock); + return ret; +} + +void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) +{ + struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[vmid]; + + wake_up_interruptible(&dev->readout_wq); +} diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h new file mode 100644 index 00000000..c57d95d1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __CTXSW_TRACE_GK20A_H +#define __CTXSW_TRACE_GK20A_H + +#define GK20A_CTXSW_TRACE_NUM_DEVS 1 + +struct gk20a; +struct nvgpu_ctxsw_trace_entry; +struct channel_gk20a; +struct channel_ctx_gk20a; +struct gk20a_ctxsw_dev; +struct gk20a_fecs_trace; + + +int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); +int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); +long gk20a_ctxsw_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); +ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); +unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); +int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); + +int gk20a_ctxsw_trace_init(struct gk20a *); +int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr); +void gk20a_ctxsw_trace_cleanup(struct gk20a *); +int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); +void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); + +#endif /* __CTXSW_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c new file mode 100644 index 00000000..bac36403 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -0,0 +1,763 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctxsw_trace_gk20a.h" +#include "fecs_trace_gk20a.h" +#include "gk20a.h" +#include "gr_gk20a.h" +#include "hw_ctxsw_prog_gk20a.h" +#include "hw_gr_gk20a.h" + +/* + * If HW circular buffer is getting too many "buffer full" conditions, + * increasing this constant should help (it drives Linux' internal buffer size). + */ +#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) +#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ +#define GK20A_FECS_TRACE_FRAME_PERIOD_NS (1000000000ULL/60ULL) +#define GK20A_FECS_TRACE_PTIMER_SHIFT 5 + +struct gk20a_fecs_trace_record { + u32 magic_lo; + u32 magic_hi; + u32 context_id; + u32 context_ptr; + u32 new_context_id; + u32 new_context_ptr; + u64 ts[]; +}; + +struct gk20a_fecs_trace_hash_ent { + u32 context_ptr; + pid_t pid; + struct hlist_node node; +}; + +struct gk20a_fecs_trace { + + struct mem_desc trace_buf; + DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); + struct mutex hash_lock; + struct mutex poll_lock; + u64 sof; + u32 sof_mask; /* did we already send a SOF for this VM */ + + struct task_struct *poll_task; +}; + +#ifdef CONFIG_GK20A_CTXSW_TRACE +static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) +{ + return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); +} + +static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) +{ + return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); +} + + +static u32 gk20a_fecs_trace_fecs_context_ptr(struct channel_gk20a *ch) +{ + return (u32) (sg_phys(ch->inst_block.sgt->sgl) >> 12LL); +} + +static inline int gk20a_fecs_trace_num_ts(void) +{ + return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() + - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); +} + +struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( + struct gk20a_fecs_trace *trace, int idx) +{ + return (struct gk20a_fecs_trace_record *) + ((u8 *) trace->trace_buf.cpu_va + + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); +} + +static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) +{ + /* + * testing magic_hi should suffice. magic_lo is sometimes used + * as a sequence number in experimental ucode. + */ + return (r->magic_hi + == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); +} + +static int gk20a_fecs_trace_get_read_index(struct gk20a *g) +{ + return gr_gk20a_elpg_protected_call(g, + gk20a_readl(g, gr_fecs_mailbox1_r())); +} + +static int gk20a_fecs_trace_get_write_index(struct gk20a *g) +{ + return gr_gk20a_elpg_protected_call(g, + gk20a_readl(g, gr_fecs_mailbox0_r())); +} + +static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index) +{ + gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index); + return gr_gk20a_elpg_protected_call(g, + (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0)); +} + +void gk20a_fecs_trace_hash_dump(struct gk20a *g) +{ + u32 bkt; + struct gk20a_fecs_trace_hash_ent *ent; + struct gk20a_fecs_trace *trace = g->fecs_trace; + + gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table"); + + mutex_lock(&trace->hash_lock); + hash_for_each(trace->pid_hash_table, bkt, ent, node) + { + gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d", + ent, bkt, ent->context_ptr, ent->pid); + + } + mutex_unlock(&trace->hash_lock); +} + +static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid) +{ + struct gk20a_fecs_trace_hash_ent *he; + struct gk20a_fecs_trace *trace = g->fecs_trace; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, + "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid); + + he = kzalloc(sizeof(*he), GFP_KERNEL); + if (unlikely(!he)) { + gk20a_warn(dev_from_gk20a(g), + "can't alloc new hash entry for context_ptr=%x pid=%d", + context_ptr, pid); + return -ENOMEM; + } + + he->context_ptr = context_ptr; + he->pid = pid; + mutex_lock(&trace->hash_lock); + hash_add(trace->pid_hash_table, &he->node, context_ptr); + mutex_unlock(&trace->hash_lock); + return 0; +} + +static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr) +{ + struct hlist_node *tmp; + struct gk20a_fecs_trace_hash_ent *ent; + struct gk20a_fecs_trace *trace = g->fecs_trace; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, + "freeing hash entry context_ptr=%x", context_ptr); + + mutex_lock(&trace->hash_lock); + hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node, + context_ptr) { + if (ent->context_ptr == context_ptr) { + hash_del(&ent->node); + gk20a_dbg(gpu_dbg_ctxsw, + "freed hash entry=%p context_ptr=%x", ent, + ent->context_ptr); + kfree(ent); + break; + } + } + mutex_unlock(&trace->hash_lock); +} + +static void gk20a_fecs_trace_free_hash_table(struct gk20a *g) +{ + u32 bkt; + struct hlist_node *tmp; + struct gk20a_fecs_trace_hash_ent *ent; + struct gk20a_fecs_trace *trace = g->fecs_trace; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace); + + mutex_lock(&trace->hash_lock); + hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) { + hash_del(&ent->node); + kfree(ent); + } + mutex_unlock(&trace->hash_lock); + +} + +static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr) +{ + struct gk20a_fecs_trace_hash_ent *ent; + struct gk20a_fecs_trace *trace = g->fecs_trace; + pid_t pid = 0; + + mutex_lock(&trace->hash_lock); + hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) { + if (ent->context_ptr == context_ptr) { + gk20a_dbg(gpu_dbg_ctxsw, + "found context_ptr=%x -> pid=%d", + ent->context_ptr, ent->pid); + pid = ent->pid; + break; + } + } + mutex_unlock(&trace->hash_lock); + + return pid; +} + +/* + * Converts HW entry format to userspace-facing format and pushes it to the + * queue. + */ +static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) +{ + int i; + struct nvgpu_ctxsw_trace_entry entry = { }; + struct gk20a_fecs_trace *trace = g->fecs_trace; + pid_t cur_pid; + pid_t new_pid; + + /* for now, only one VM */ + const int vmid = 0; + + struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( + trace, index); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, + "consuming record trace=%p read=%d record=%p", trace, index, r); + + if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) { + gk20a_warn(dev_from_gk20a(g), + "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", + trace, index, r, r->magic_lo, r->magic_hi); + return -EINVAL; + } + + cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); + new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, + "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)", + r->context_ptr, cur_pid, r->new_context_ptr, new_pid); + + entry.context_id = r->context_id; + entry.vmid = vmid; + + /* insert SOF event if needed */ + if (!(trace->sof_mask & BIT(vmid))) { + entry.tag = NVGPU_CTXSW_TAG_SOF; + entry.timestamp = trace->sof; + entry.context_id = 0; + entry.pid = 0; + + gk20a_dbg(gpu_dbg_ctxsw, "SOF time=%llx", entry.timestamp); + gk20a_ctxsw_trace_write(g, &entry); + trace->sof_mask |= BIT(vmid); + } + + /* break out FECS record into trace events */ + for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { + + entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); + entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); + entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; + + gk20a_dbg(gpu_dbg_ctxsw, + "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", + entry.tag, entry.timestamp, r->context_id, + r->new_context_id); + + switch (entry.tag) { + case NVGPU_CTXSW_TAG_RESTORE_START: + case NVGPU_CTXSW_TAG_CONTEXT_START: + entry.context_id = r->new_context_id; + entry.pid = new_pid; + break; + + case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: + case NVGPU_CTXSW_TAG_FE_ACK: + case NVGPU_CTXSW_TAG_FE_ACK_WFI: + case NVGPU_CTXSW_TAG_FE_ACK_GFXP: + case NVGPU_CTXSW_TAG_FE_ACK_CTAP: + case NVGPU_CTXSW_TAG_FE_ACK_CILP: + case NVGPU_CTXSW_TAG_SAVE_END: + entry.context_id = r->context_id; + entry.pid = cur_pid; + break; + + default: + /* tags are not guaranteed to start at the beginning */ + WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP)); + continue; + } + + gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", + entry.tag, entry.context_id, entry.pid); + + if (!entry.context_id) + continue; + + gk20a_ctxsw_trace_write(g, &entry); + } + + gk20a_ctxsw_trace_wake_up(g, vmid); + return 0; +} + +static int gk20a_fecs_trace_poll(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace = g->fecs_trace; + + int read = 0; + int write = 0; + int cnt; + int err; + + err = gk20a_busy(g->dev); + if (unlikely(err)) + return err; + + mutex_lock(&trace->poll_lock); + write = gk20a_fecs_trace_get_write_index(g); + if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) { + gk20a_err(dev_from_gk20a(g), + "failed to acquire write index, write=%d", write); + err = write; + goto done; + } + + read = gk20a_fecs_trace_get_read_index(g); + + cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); + if (!cnt) + goto done; + + gk20a_dbg(gpu_dbg_ctxsw, + "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", + read, gk20a_fecs_trace_get_read_index(g), write, cnt); + + /* we did not send any SOF yet */ + trace->sof_mask = 0; + + /* consume all records */ + while (read != write) { + gk20a_fecs_trace_ring_read(g, read); + + /* Get to next record. */ + read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); + gk20a_fecs_trace_set_read_index(g, read); + } + +done: + /* + * OK, we read out all the entries... a new "frame" starts here. + * We remember the Start Of Frame time and insert it on the next + * iteration. + */ + trace->sof = gk20a_read_ptimer(g); + + mutex_unlock(&trace->poll_lock); + gk20a_idle(g->dev); + return err; +} + +static int gk20a_fecs_trace_periodic_polling(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct timespec ts = ns_to_timespec(GK20A_FECS_TRACE_FRAME_PERIOD_NS); + + pr_info("%s: running\n", __func__); + + while (!kthread_should_stop()) { + + hrtimer_nanosleep(&ts, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + + gk20a_fecs_trace_poll(g); + } + + return 0; +} + +static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace = g->fecs_trace; + + return gk20a_gmmu_alloc(g, GK20A_FECS_TRACE_NUM_RECORDS + * ctxsw_prog_record_timestamp_record_size_in_bytes_v(), + &trace->trace_buf); +} + +static void gk20a_fecs_trace_free_ring(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace = g->fecs_trace; + + gk20a_gmmu_free(g, &trace->trace_buf); +} + +#ifdef CONFIG_DEBUG_FS +/* + * The sequence iterator functions. We simply use the count of the + * next line as our internal position. + */ +static void *gk20a_fecs_trace_debugfs_ring_seq_start( + struct seq_file *s, loff_t *pos) +{ + if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) + return NULL; + + return pos; +} + +static void *gk20a_fecs_trace_debugfs_ring_seq_next( + struct seq_file *s, void *v, loff_t *pos) +{ + ++(*pos); + if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) + return NULL; + return pos; +} + +static void gk20a_fecs_trace_debugfs_ring_seq_stop( + struct seq_file *s, void *v) +{ +} + +static int gk20a_fecs_trace_debugfs_ring_seq_show( + struct seq_file *s, void *v) +{ + loff_t *pos = (loff_t *) v; + struct gk20a *g = *(struct gk20a **)s->private; + struct gk20a_fecs_trace *trace = g->fecs_trace; + struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); + int i; + const u32 invalid_tag = + ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); + u32 tag; + u64 timestamp; + + seq_printf(s, "record #%lld (%p)\n", *pos, r); + seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); + seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); + if (gk20a_fecs_trace_is_valid_record(r)) { + seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); + seq_printf(s, "\tcontext_id=%08x\n", r->context_id); + seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); + seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); + for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { + tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); + if (tag == invalid_tag) + continue; + timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); + timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; + seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); + } + } + return 0; +} + +/* + * Tie them all together into a set of seq_operations. + */ +const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = { + .start = gk20a_fecs_trace_debugfs_ring_seq_start, + .next = gk20a_fecs_trace_debugfs_ring_seq_next, + .stop = gk20a_fecs_trace_debugfs_ring_seq_stop, + .show = gk20a_fecs_trace_debugfs_ring_seq_show +}; + +/* + * Time to set up the file operations for our /proc file. In this case, + * all we need is an open function which sets up the sequence ops. + */ + +static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode, + struct file *file) +{ + struct gk20a **p; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops, + sizeof(struct gk20a *)); + if (!p) + return -ENOMEM; + + *p = (struct gk20a *)inode->i_private; + return 0; +}; + +/* + * The file operations structure contains our open function along with + * set of the canned seq_ ops. + */ +const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = { + .owner = THIS_MODULE, + .open = gk20a_ctxsw_debugfs_ring_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private +}; + +static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val) +{ + *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops, + gk20a_fecs_trace_debugfs_read, NULL, "%llu\n"); + +static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val) +{ + *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops, + gk20a_fecs_trace_debugfs_write, NULL, "%llu\n"); + +static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *plat = platform_get_drvdata(g->dev); + + debugfs_create_file("ctxsw_trace_read", 0600, plat->debugfs, g, + &gk20a_fecs_trace_debugfs_read_fops); + debugfs_create_file("ctxsw_trace_write", 0600, plat->debugfs, g, + &gk20a_fecs_trace_debugfs_write_fops); + debugfs_create_file("ctxsw_trace_ring", 0600, plat->debugfs, g, + &gk20a_fecs_trace_debugfs_ring_fops); +} + +static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) +{ + struct gk20a_platform *plat = platform_get_drvdata(g->dev); + + debugfs_remove_recursive(plat->debugfs); +} + +#else + +static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) +{ +} + +static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +static int gk20a_fecs_trace_init(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace; + int err; + + trace = kzalloc(sizeof(struct gk20a_fecs_trace), GFP_KERNEL); + if (!trace) { + gk20a_warn(dev_from_gk20a(g), "failed to allocate fecs_trace"); + return -ENOMEM; + } + g->fecs_trace = trace; + + BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); + err = gk20a_fecs_trace_alloc_ring(g); + if (err) { + gk20a_warn(dev_from_gk20a(g), "failed to allocate FECS ring"); + goto clean; + } + + mutex_init(&trace->poll_lock); + mutex_init(&trace->hash_lock); + hash_init(trace->pid_hash_table); + + gk20a_fecs_trace_debugfs_init(g); + return 0; + +clean: + kfree(trace); + g->fecs_trace = NULL; + return err; +} + +static int gk20a_fecs_trace_bind_channel(struct gk20a *g, + struct channel_gk20a *ch) +{ + /* + * map our circ_buf to the context space and store the GPU VA + * in the context header. + */ + + u32 lo; + u32 hi; + phys_addr_t pa; + struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct gk20a_fecs_trace *trace = g->fecs_trace; + void *ctx_ptr; + u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, + "hw_chid=%d context_ptr=%x inst_block=%llx", + ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block)); + + if (!trace) + return -ENOMEM; + + pa = gk20a_mem_phys(&trace->trace_buf); + if (!pa) + return -ENOMEM; + + ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, + PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0, + pgprot_writecombine(PAGE_KERNEL)); + if (!ctx_ptr) + return -ENOMEM; + + lo = u64_lo32(pa); + hi = u64_hi32(pa); + + gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, + lo, GK20A_FECS_TRACE_NUM_RECORDS); + + gk20a_mem_wr32(ctx_ptr + + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), + 0, lo); + gk20a_mem_wr32(ctx_ptr + + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), + 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); + gk20a_mem_wr32(ctx_ptr + + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), + 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( + GK20A_FECS_TRACE_NUM_RECORDS)); + + vunmap(ctx_ptr); + gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); + + return 0; +} + +static int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) +{ + u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, + "ch=%p context_ptr=%x", ch, context_ptr); + + if (g->ops.fecs_trace.flush) + g->ops.fecs_trace.flush(g); + gk20a_fecs_trace_poll(g); + gk20a_fecs_trace_hash_del(g, context_ptr); + return 0; +} + +static int gk20a_fecs_trace_reset(struct gk20a *g) +{ + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); + + if (g->ops.fecs_trace.flush) + g->ops.fecs_trace.flush(g); + gk20a_fecs_trace_poll(g); + return gk20a_fecs_trace_set_read_index(g, 0); +} + +static int gk20a_fecs_trace_deinit(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace = g->fecs_trace; + + gk20a_fecs_trace_debugfs_cleanup(g); + kthread_stop(trace->poll_task); + gk20a_fecs_trace_free_ring(g); + gk20a_fecs_trace_free_hash_table(g); + + kfree(g->fecs_trace); + g->fecs_trace = NULL; + return 0; +} + +static int gk20a_gr_max_entries(struct gk20a *g, + struct nvgpu_ctxsw_trace_filter *filter) +{ + int n; + int tag; + + /* Compute number of entries per record, with given filter */ + for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++) + n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0); + + /* Return max number of entries generated for the whole ring */ + return n * GK20A_FECS_TRACE_NUM_RECORDS; +} + +static int gk20a_fecs_trace_enable(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace = g->fecs_trace; + struct task_struct *task; + + if (!trace->poll_task) { + task = kthread_run(gk20a_fecs_trace_periodic_polling, g, __func__); + if (unlikely(IS_ERR(task))) { + gk20a_warn(dev_from_gk20a(g), "failed to create FECS polling task"); + return PTR_ERR(task); + } + trace->poll_task = task; + } + + return 0; +} + +static int gk20a_fecs_trace_disable(struct gk20a *g) +{ + struct gk20a_fecs_trace *trace = g->fecs_trace; + + if (trace->poll_task) { + kthread_stop(trace->poll_task); + trace->poll_task = NULL; + } + + return -EPERM; +} + +void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) +{ + ops->fecs_trace.init = gk20a_fecs_trace_init; + ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; + ops->fecs_trace.enable = gk20a_fecs_trace_enable; + ops->fecs_trace.disable = gk20a_fecs_trace_disable; + ops->fecs_trace.reset = gk20a_fecs_trace_reset; + ops->fecs_trace.flush = NULL; + ops->fecs_trace.poll = gk20a_fecs_trace_poll; + ops->fecs_trace.bind_channel = gk20a_fecs_trace_bind_channel; + ops->fecs_trace.unbind_channel = gk20a_fecs_trace_unbind_channel; + ops->fecs_trace.max_entries = gk20a_gr_max_entries; +} +#else +void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) +{ +} +#endif /* CONFIG_GK20A_CTXSW_TRACE */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h new file mode 100644 index 00000000..4979d6c6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __FECS_TRACE_GK20A_H +#define __FECS_TRACE_GK20A_H + +struct gpu_ops; +void gk20a_init_fecs_trace_ops(struct gpu_ops *ops); + +#endif /* __FECS_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 661c2c38..029a713f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -25,6 +25,7 @@ #include "gk20a.h" #include "debug_gk20a.h" +#include "ctxsw_trace_gk20a.h" #include "semaphore_gk20a.h" #include "hw_fifo_gk20a.h" #include "hw_pbdma_gk20a.h" @@ -776,13 +777,17 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) if (engine_id == top_device_info_type_enum_graphics_v()) { if (support_gk20a_pmu(g->dev) && g->elpg_enabled) gk20a_pmu_disable_elpg(g); - /*HALT_PIPELINE method, halt GR engine*/ - if (gr_gk20a_halt_pipe(g)) - gk20a_err(dev_from_gk20a(g), - "failed to HALT gr pipe"); - /* resetting engine using mc_enable_r() is not - enough, we do full init sequence */ - gk20a_gr_reset(g); + /*HALT_PIPELINE method, halt GR engine*/ + if (gr_gk20a_halt_pipe(g)) + gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe"); + /* resetting engine will alter read/write index. + * need to flush circular buffer before re-enabling FECS. + */ + if (g->ops.fecs_trace.reset) + g->ops.fecs_trace.reset(g); + /* resetting engine using mc_enable_r() is not + enough, we do full init sequence */ + gk20a_gr_reset(g); if (support_gk20a_pmu(g->dev) && g->elpg_enabled) gk20a_pmu_enable_elpg(g); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 0cc9564b..735bf90b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -60,6 +60,7 @@ #include "hw_gr_gk20a.h" #include "hw_fb_gk20a.h" #include "gk20a_scale.h" +#include "ctxsw_trace_gk20a.h" #include "dbg_gpu_gk20a.h" #include "gk20a_allocator.h" #include "hal.h" @@ -80,7 +81,7 @@ /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ #define INTERFACE_NAME "nvhost%s-gpu" -#define GK20A_NUM_CDEVS 6 +#define GK20A_NUM_CDEVS 7 #define EMC3D_DEFAULT_RATIO 750 @@ -169,6 +170,19 @@ static const struct file_operations gk20a_tsg_ops = { .unlocked_ioctl = gk20a_tsg_dev_ioctl, }; +static const struct file_operations gk20a_ctxsw_ops = { + .owner = THIS_MODULE, + .release = gk20a_ctxsw_dev_release, + .open = gk20a_ctxsw_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_ctxsw_dev_ioctl, +#endif + .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, + .poll = gk20a_ctxsw_dev_poll, + .read = gk20a_ctxsw_dev_read, + .mmap = gk20a_ctxsw_dev_mmap, +}; + static inline void sim_writel(struct gk20a *g, u32 r, u32 v) { writel(v, g->sim.regs+r); @@ -881,6 +895,10 @@ static int gk20a_pm_finalize_poweron(struct device *dev) goto done; } + err = gk20a_ctxsw_trace_init(g); + if (err) + gk20a_warn(dev, "could not initialize ctxsw tracing"); + /* Restore the debug setting */ g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl); @@ -1009,6 +1027,11 @@ void gk20a_user_deinit(struct platform_device *dev) cdev_del(&g->tsg.cdev); } + if (g->ctxsw.node) { + device_destroy(g->class, g->ctxsw.cdev.dev); + cdev_del(&g->ctxsw.cdev); + } + if (g->cdev_region) unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); @@ -1074,6 +1097,15 @@ int gk20a_user_init(struct platform_device *dev) if (err) goto fail; +#ifdef CONFIG_GK20A_CTXSW_TRACE + err = gk20a_create_device(dev, devno++, "-ctxsw", + &g->ctxsw.cdev, &g->ctxsw.node, + &gk20a_ctxsw_ops); + if (err) + goto fail; +#endif + + return 0; fail: gk20a_user_deinit(dev); @@ -1554,6 +1586,8 @@ static int __exit gk20a_remove(struct platform_device *dev) if (platform->has_cde) gk20a_cde_destroy(g); + gk20a_ctxsw_trace_cleanup(g); + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) gk20a_scale_exit(dev); @@ -2091,6 +2125,19 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name) return fw; } + +u64 gk20a_read_ptimer(struct gk20a *g) +{ + u32 time_hi0 = gk20a_readl(g, timer_time_1_r()); + u32 time_lo = gk20a_readl(g, timer_time_0_r()); + u32 time_hi1 = gk20a_readl(g, timer_time_1_r()); + u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1; + u64 time = ((u64)time_hi << 32) | time_lo; + + return time; +} + + MODULE_LICENSE("GPL v2"); module_init(gk20a_init); module_exit(gk20a_exit); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8b87c7aa..541e7b50 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -25,6 +25,8 @@ struct channel_gk20a; struct gr_gk20a; struct sim_gk20a; struct gk20a_ctxsw_ucode_segments; +struct gk20a_fecs_trace; +struct gk20a_ctxsw_trace; struct acr_gm20b; #include @@ -372,6 +374,19 @@ struct gpu_ops { bool (*is_fw_defined)(void); bool use_dma_for_fw_bootstrap; } gr_ctx; + struct { + int (*init)(struct gk20a *g); + int (*max_entries)(struct gk20a *, + struct nvgpu_ctxsw_trace_filter *); + int (*flush)(struct gk20a *g); + int (*poll)(struct gk20a *g); + int (*enable)(struct gk20a *g); + int (*disable)(struct gk20a *g); + int (*reset)(struct gk20a *g); + int (*bind_channel)(struct gk20a *, struct channel_gk20a *); + int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); + int (*deinit)(struct gk20a *g); + } fecs_trace; struct { bool (*support_sparse)(struct gk20a *g); bool (*is_debug_mode_enabled)(struct gk20a *g); @@ -613,6 +628,11 @@ struct gk20a { struct device *node; } tsg; + struct { + struct cdev cdev; + struct device *node; + } ctxsw; + struct mutex client_lock; int client_refcount; /* open channels and ctrl nodes */ @@ -639,6 +659,9 @@ struct gk20a { struct gk20a_scale_profile *scale_profile; + struct gk20a_ctxsw_trace *ctxsw_trace; + struct gk20a_fecs_trace *fecs_trace; + struct device_dma_parameters dma_parms; struct gk20a_cde_app cde_app; @@ -716,6 +739,7 @@ enum gk20a_dbg_categories { gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ gpu_dbg_cde = BIT(10), /* cde info messages */ gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ + gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ }; @@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x) else return (timeout * 10) / scale10x; } + +u64 gk20a_read_ptimer(struct gk20a *g); #endif /* GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 60bba0b8..08f1d921 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -56,6 +56,7 @@ #include "debug_gk20a.h" #include "semaphore_gk20a.h" #include "platform_gk20a.h" +#include "ctxsw_trace_gk20a.h" #define BLK_SIZE (256) @@ -2855,6 +2856,13 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, "fail to load golden ctx image"); goto out; } + if (g->ops.fecs_trace.bind_channel) { + err = g->ops.fecs_trace.bind_channel(g, c); + if (err) { + gk20a_warn(dev_from_gk20a(g), + "fail to bind channel for ctxsw trace"); + } + } c->first_init = true; } diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index a9ad970a..9718aad2 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -22,6 +22,7 @@ #include "gk20a_gating_reglist.h" #include "channel_gk20a.h" #include "gr_ctx_gk20a.h" +#include "fecs_trace_gk20a.h" #include "mm_gk20a.h" #include "mc_gk20a.h" #include "pmu_gk20a.h" @@ -57,6 +58,7 @@ int gk20a_init_hal(struct gk20a *g) gk20a_init_mc(gops); gk20a_init_ltc(gops); gk20a_init_gr_ops(gops); + gk20a_init_fecs_trace_ops(gops); gk20a_init_fb(gops); gk20a_init_fifo(gops); gk20a_init_ce2(gops); diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h index 39cbbb58..da555f7c 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -246,4 +246,192 @@ static inline u32 ctxsw_prog_main_image_context_id_o(void) { return 0x000000f0; } +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_o(void) +{ + return 0x000000ac; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(void) +{ + return 0x000000b0; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_v_m(void) +{ + return 0xfffffff << 0; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_m(void) +{ + return 0x3 << 28; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(void) +{ + return 0x20000000; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(void) +{ + return 0x30000000; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(void) +{ + return 0x000000b4; +} +static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 ctxsw_prog_record_timestamp_record_size_in_bytes_v(void) +{ + return 0x00000080; +} +static inline u32 ctxsw_prog_record_timestamp_record_size_in_words_v(void) +{ + return 0x00000020; +} +static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_o(void) +{ + return 0x00000000; +} +static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_v_value_v(void) +{ + return 0x00000000; +} +static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_o(void) +{ + return 0x00000004; +} +static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(void) +{ + return 0x600dbeef; +} +static inline u32 ctxsw_prog_record_timestamp_context_id_o(void) +{ + return 0x00000008; +} +static inline u32 ctxsw_prog_record_timestamp_context_ptr_o(void) +{ + return 0x0000000c; +} +static inline u32 ctxsw_prog_record_timestamp_new_context_id_o(void) +{ + return 0x00000010; +} +static inline u32 ctxsw_prog_record_timestamp_new_context_ptr_o(void) +{ + return 0x00000014; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_lo_o(void) +{ + return 0x00000018; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_o(void) +{ + return 0x0000001c; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_v(u32 r) +{ + return (r >> 0) & 0xffffff; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_m(void) +{ + return 0xff << 24; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_v(u32 r) +{ + return (r >> 24) & 0xff; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_v(void) +{ + return 0x00000001; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_f(void) +{ + return 0x1000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_v(void) +{ + return 0x00000002; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_f(void) +{ + return 0x2000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_v(void) +{ + return 0x0000000a; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_f(void) +{ + return 0xa000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_v(void) +{ + return 0x0000000b; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_f(void) +{ + return 0xb000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_v(void) +{ + return 0x0000000c; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_f(void) +{ + return 0xc000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_v(void) +{ + return 0x0000000d; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_f(void) +{ + return 0xd000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_v(void) +{ + return 0x00000003; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_f(void) +{ + return 0x3000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_v(void) +{ + return 0x00000004; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_f(void) +{ + return 0x4000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_v(void) +{ + return 0x00000005; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_f(void) +{ + return 0x5000000; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(void) +{ + return 0x000000ff; +} +static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_f(void) +{ + return 0xff000000; +} #endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h index dbbc914f..4cb36cbe 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c new file mode 100644 index 00000000..cb955811 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "gk20a/gk20a.h" +#include "fecs_trace_vgpu.h" + +void vgpu_init_fecs_trace_ops(struct gpu_ops *ops) +{ + memset(&ops->fecs_trace, 0, sizeof(ops->fecs_trace)); +} diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h new file mode 100644 index 00000000..1aace1fe --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __FECS_TRACE_VGPU_H +#define __FECS_TRACE_VGPU_H + +struct gpu_ops; +void vgpu_init_fecs_trace_ops(struct gpu_ops *ops); + +#endif /* __FECS_TRACE_VGPU_H */ diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 4cc61cb1..57f510ca 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -18,6 +18,7 @@ #include #include #include "vgpu/vgpu.h" +#include "vgpu/fecs_trace_vgpu.h" #include "gk20a/debug_gk20a.h" #include "gk20a/hal_gk20a.h" #include "gk20a/hw_mc_gk20a.h" @@ -259,6 +260,7 @@ void vgpu_init_hal_common(struct gk20a *g) vgpu_init_ltc_ops(gops); vgpu_init_mm_ops(gops); vgpu_init_debug_ops(gops); + vgpu_init_fecs_trace_ops(gops); } static int vgpu_init_hal(struct gk20a *g) -- cgit v1.2.2