From 60b715e85600a6be283e54c610c2a3db3b552059 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Mon, 7 Mar 2016 14:23:12 -0800 Subject: gpu: nvgpu: vgpu: add fecs trace support Bug 1648908 Change-Id: I7901e7bce5f7aa124a188101dd0736241d87bd53 Signed-off-by: Richard Zhao Reviewed-on: http://git-master/r/1031861 Reviewed-on: http://git-master/r/1121261 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Thomas Fleury Reviewed-by: Aingara Paramakuru GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 90 +++++++++---- drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 1 + drivers/gpu/nvgpu/gk20a/gk20a.h | 7 + drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c | 196 +++++++++++++++++++++++++++- drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h | 1 + drivers/gpu/nvgpu/vgpu/vgpu.c | 8 ++ 7 files changed, 278 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 9e7c04ad..6a61d744 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c @@ -130,42 +130,55 @@ static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) { gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); dev->write_enabled = true; + dev->g->ops.fecs_trace.enable(dev->g); return 0; } static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) { gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); + dev->g->ops.fecs_trace.disable(dev->g); dev->write_enabled = false; return 0; } -static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, - size_t size) +static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, + size_t size) { - struct nvgpu_ctxsw_ring_header *hdr; - - if (atomic_read(&dev->vma_ref)) - return -EBUSY; + struct gk20a *g = dev->g; + void *buf; + int err; if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) return -EBUSY; - size = roundup(size, PAGE_SIZE); - hdr = vmalloc_user(size); - if (!hdr) - return -ENOMEM; + err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); + if (err) + return err; - if (dev->hdr) - vfree(dev->hdr); - dev->hdr = hdr; + dev->hdr = buf; dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); dev->size = size; + gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", + dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); + return 0; +} + +static int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, + void **buf, size_t *size) +{ + struct nvgpu_ctxsw_ring_header *hdr; + + *size = roundup(*size, PAGE_SIZE); + hdr = vmalloc_user(*size); + if (!hdr) + return -ENOMEM; + hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; - hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header)) + hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) / sizeof(struct nvgpu_ctxsw_trace_entry); hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); hdr->drop_count = 0; @@ -173,8 +186,15 @@ static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, hdr->write_idx = 0; hdr->write_seqno = 0; - gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", - dev->size, dev->hdr, dev->ents, hdr->num_ents); + *buf = hdr; + return 0; +} + +static int gk20a_ctxsw_dev_ring_free(struct gk20a *g) +{ + struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; + + vfree(dev->hdr); return 0; } @@ -188,13 +208,17 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) return -EINVAL; - return gk20a_ctxsw_dev_ring_alloc(dev, size); + return gk20a_ctxsw_dev_alloc_buffer(dev, size); } static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, struct nvgpu_ctxsw_trace_filter_args *args) { + struct gk20a *g = dev->g; + dev->filter = args->filter; + if (g->ops.fecs_trace.set_filter) + g->ops.fecs_trace.set_filter(g, &dev->filter); return 0; } @@ -276,15 +300,13 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); - err = gk20a_ctxsw_dev_ring_alloc(dev, size); + err = gk20a_ctxsw_dev_alloc_buffer(dev, size); if (!err) { filp->private_data = dev; gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", filp, dev, size); } - err = g->ops.fecs_trace.enable(g); - done: mutex_unlock(&dev->lock); @@ -297,19 +319,18 @@ idle: int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) { struct gk20a_ctxsw_dev *dev = filp->private_data; - struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); mutex_lock(&dev->lock); - dev->write_enabled = false; + if (dev->write_enabled) + gk20a_ctxsw_dev_ioctl_trace_disable(dev); + if (dev->hdr) { - vfree(dev->hdr); + dev->g->ops.fecs_trace.free_user_buffer(dev->g); dev->hdr = NULL; } - g->ops.fecs_trace.disable(g); - mutex_unlock(&dev->lock); return 0; @@ -417,6 +438,12 @@ static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { .close = gk20a_ctxsw_dev_vma_close, }; +static int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, + struct vm_area_struct *vma) +{ + return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); +} + int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) { struct gk20a_ctxsw_dev *dev = filp->private_data; @@ -425,7 +452,7 @@ int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); - ret = remap_vmalloc_range(vma, dev->hdr, 0); + ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); if (likely(!ret)) { vma->vm_private_data = dev; vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; @@ -482,6 +509,7 @@ int gk20a_ctxsw_trace_init(struct gk20a *g) return 0; fail: + memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); kfree(trace); g->ctxsw_trace = NULL; return err; @@ -493,6 +521,9 @@ fail: void gk20a_ctxsw_trace_cleanup(struct gk20a *g) { #ifdef CONFIG_GK20A_CTXSW_TRACE + if (!g->ctxsw_trace) + return; + kfree(g->ctxsw_trace); g->ctxsw_trace = NULL; @@ -584,3 +615,10 @@ void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) wake_up_interruptible(&dev->readout_wq); } + +void gk20a_ctxsw_trace_init_ops(struct gpu_ops *ops) +{ + ops->fecs_trace.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc; + ops->fecs_trace.free_user_buffer = gk20a_ctxsw_dev_ring_free; + ops->fecs_trace.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer; +} diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h index c57d95d1..7a2f4aeb 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h @@ -17,6 +17,7 @@ #define GK20A_CTXSW_TRACE_NUM_DEVS 1 struct gk20a; +struct gpu_ops; struct nvgpu_ctxsw_trace_entry; struct channel_gk20a; struct channel_ctx_gk20a; @@ -37,5 +38,6 @@ int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr); void gk20a_ctxsw_trace_cleanup(struct gk20a *); int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); +void gk20a_ctxsw_trace_init_ops(struct gpu_ops *ops); #endif /* __CTXSW_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index aef0c9b3..58d8fda1 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -745,6 +745,7 @@ static int gk20a_fecs_trace_disable(struct gk20a *g) void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) { + gk20a_ctxsw_trace_init_ops(ops); ops->fecs_trace.init = gk20a_fecs_trace_init; ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; ops->fecs_trace.enable = gk20a_fecs_trace_enable; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d9cc3d4f..fb149f56 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -391,6 +391,13 @@ struct gpu_ops { int (*bind_channel)(struct gk20a *, struct channel_gk20a *); int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); int (*deinit)(struct gk20a *g); + int (*alloc_user_buffer)(struct gk20a *g, + void **buf, size_t *size); + int (*free_user_buffer)(struct gk20a *g); + int (*mmap_user_buffer)(struct gk20a *g, + struct vm_area_struct *vma); + int (*set_filter)(struct gk20a *g, + struct nvgpu_ctxsw_trace_filter *filter); } fecs_trace; struct { bool (*support_sparse)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c index cb955811..568f3784 100644 --- a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c @@ -12,10 +12,204 @@ */ #include +#include +#include + #include "gk20a/gk20a.h" +#include "gk20a/ctxsw_trace_gk20a.h" +#include "vgpu.h" #include "fecs_trace_vgpu.h" +struct vgpu_fecs_trace { + struct tegra_hv_ivm_cookie *cookie; + struct nvgpu_ctxsw_ring_header *header; + struct nvgpu_ctxsw_trace_entry *entries; + int num_entries; + void *buf; +}; + +static int vgpu_fecs_trace_init(struct gk20a *g) +{ + struct device *dev = g->dev; + struct device_node *np = dev->of_node; + struct of_phandle_args args; + struct device_node *hv_np; + struct vgpu_fecs_trace *vcst; + u32 mempool; + int err; + + gk20a_dbg_fn(""); + + vcst = kzalloc(sizeof(*vcst), GFP_KERNEL); + if (!vcst) + return -ENOMEM; + + err = of_parse_phandle_with_fixed_args(np, + "mempool-fecs-trace", 1, 0, &args); + if (err) { + dev_info(dev_from_gk20a(g), "does not support fecs trace\n"); + goto fail; + } + + hv_np = args.np; + mempool = args.args[0]; + vcst->cookie = tegra_hv_mempool_reserve(hv_np, mempool); + if (IS_ERR(vcst->cookie)) { + dev_info(dev_from_gk20a(g), + "mempool %u reserve failed\n", mempool); + err = -EINVAL; + goto fail; + } + + vcst->buf = ioremap_cache(vcst->cookie->ipa, vcst->cookie->size); + vcst->header = vcst->buf; + vcst->num_entries = vcst->header->num_ents; + if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { + dev_err(dev_from_gk20a(g), + "entry size mismatch\n"); + goto fail; + } + vcst->entries = vcst->buf + sizeof(*vcst->header); + g->fecs_trace = (struct gk20a_fecs_trace *)vcst; + + return 0; +fail: + iounmap(vcst->buf); + if (vcst->cookie) + tegra_hv_mempool_unreserve(vcst->cookie); + kfree(vcst); + return err; +} + +static int vgpu_fecs_trace_deinit(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + iounmap(vcst->buf); + tegra_hv_mempool_unreserve(vcst->cookie); + kfree(vcst); + return 0; +} + +static int vgpu_fecs_trace_enable(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, + .handle = gk20a_get_platform(g->dev)->virt_handle, + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +static int vgpu_fecs_trace_disable(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, + .handle = gk20a_get_platform(g->dev)->virt_handle, + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +static int vgpu_fecs_trace_poll(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, + .handle = gk20a_get_platform(g->dev)->virt_handle, + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +static int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + *buf = vcst->buf; + *size = vcst->cookie->size; + return 0; +} + +static int vgpu_free_user_buffer(struct gk20a *g) +{ + return 0; +} + +static int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + unsigned long size = vcst->cookie->size; + unsigned long vsize = vma->vm_end - vma->vm_start; + + size = min(size, vsize); + size = round_up(size, PAGE_SIZE); + + return remap_pfn_range(vma, vma->vm_start, + vcst->cookie->ipa >> PAGE_SHIFT, + size, + vma->vm_page_prot); +} + +static int vgpu_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_ctxsw_trace_filter *filter) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + return vcst->header->num_ents; +} + +#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE +#error "FECS trace filter size mismatch!" +#endif + +static int vgpu_fecs_trace_set_filter(struct gk20a *g, + struct nvgpu_ctxsw_trace_filter *filter) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, + .handle = gk20a_get_platform(g->dev)->virt_handle, + }; + struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; + int err; + + memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + void vgpu_init_fecs_trace_ops(struct gpu_ops *ops) { - memset(&ops->fecs_trace, 0, sizeof(ops->fecs_trace)); + ops->fecs_trace.init = vgpu_fecs_trace_init; + ops->fecs_trace.deinit = vgpu_fecs_trace_deinit; + ops->fecs_trace.enable = vgpu_fecs_trace_enable; + ops->fecs_trace.disable = vgpu_fecs_trace_disable; + ops->fecs_trace.reset = NULL; + ops->fecs_trace.flush = NULL; + ops->fecs_trace.poll = vgpu_fecs_trace_poll; + ops->fecs_trace.bind_channel = NULL; + ops->fecs_trace.unbind_channel = NULL; + ops->fecs_trace.max_entries = vgpu_fecs_trace_max_entries; + ops->fecs_trace.alloc_user_buffer = vgpu_alloc_user_buffer; + ops->fecs_trace.free_user_buffer = vgpu_free_user_buffer; + ops->fecs_trace.mmap_user_buffer = vgpu_mmap_user_buffer; + ops->fecs_trace.set_filter = vgpu_fecs_trace_set_filter; +} + +void vgpu_fecs_trace_data_update(struct gk20a *g) +{ + gk20a_ctxsw_trace_wake_up(g, 0); } diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h index 1aace1fe..93c44b71 100644 --- a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h @@ -16,5 +16,6 @@ struct gpu_ops; void vgpu_init_fecs_trace_ops(struct gpu_ops *ops); +void vgpu_fecs_trace_data_update(struct gk20a *g); #endif /* __FECS_TRACE_VGPU_H */ diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index b67f4d95..90197ab4 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -22,6 +22,7 @@ #include "gk20a/debug_gk20a.h" #include "gk20a/hal_gk20a.h" #include "gk20a/hw_mc_gk20a.h" +#include "gk20a/ctxsw_trace_gk20a.h" #include "gm20b/hal_gm20b.h" #ifdef CONFIG_ARCH_TEGRA_18x_SOC @@ -120,6 +121,12 @@ static int vgpu_intr_thread(void *dev_id) break; } + if (msg->event == TEGRA_VGPU_EVENT_FECS_TRACE) { + vgpu_fecs_trace_data_update(g); + tegra_gr_comm_release(handle); + continue; + } + if (msg->unit == TEGRA_VGPU_INTR_GR) vgpu_gr_isr(g, &msg->info.gr_intr); else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_GR) @@ -334,6 +341,7 @@ int vgpu_pm_finalize_poweron(struct device *dev) g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_TSG; + gk20a_ctxsw_trace_init(g); gk20a_channel_resume(g); done: -- cgit v1.2.2