From 989f7f70c3b7a77467cb216932490e48dc03c64c Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Thu, 28 Apr 2016 08:50:37 -0700 Subject: gpu: nvgpu: fix deadlock on FECS trace disable fecs_trace.disable() method kills polling thread and waits until it completes. dev->lock must not be held while calling this function, as polling thread may attempt to acquire it, leading to a deadlock. Also fixed potential deadlock in ioctl to disable trace. Bug 1758405 Change-Id: I8f6baaba8093ce92961413f6152ee8b81beca3e4 Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1139296 (cherry picked from commit 3391a911e1fa9170a5aa989c81bcba6a2f79a9d4) Reviewed-on: http://git-master/r/1150047 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Richard Zhao GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 62 +++++++++++++++++------------ 1 file changed, 36 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 0fa9e65a..19ba6dde 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c @@ -46,7 +46,7 @@ struct gk20a_ctxsw_dev { atomic_t vma_ref; - struct mutex lock; + struct mutex write_lock; }; @@ -87,16 +87,16 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "filp=%p buf=%p size=%zu", filp, buf, size); - mutex_lock(&dev->lock); + mutex_lock(&dev->write_lock); while (ring_is_empty(hdr)) { - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; err = wait_event_interruptible(dev->readout_wq, !ring_is_empty(hdr)); if (err) return err; - mutex_lock(&dev->lock); + mutex_lock(&dev->write_lock); } while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { @@ -105,7 +105,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, if (copy_to_user(entry, &dev->ents[hdr->read_idx], sizeof(*entry))) { - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); return -EFAULT; } @@ -122,7 +122,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, hdr->read_idx); *off = hdr->read_idx; - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); return copied; } @@ -130,7 +130,9 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) { gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); + mutex_lock(&dev->write_lock); dev->write_enabled = true; + mutex_unlock(&dev->write_lock); dev->g->ops.fecs_trace.enable(dev->g); return 0; } @@ -139,7 +141,9 @@ static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) { gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); dev->g->ops.fecs_trace.disable(dev->g); + mutex_lock(&dev->write_lock); dev->write_enabled = false; + mutex_unlock(&dev->write_lock); return 0; } @@ -203,13 +207,18 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, struct nvgpu_ctxsw_ring_setup_args *args) { size_t size = args->size; + int ret; gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) return -EINVAL; - return gk20a_ctxsw_dev_alloc_buffer(dev, size); + mutex_lock(&dev->write_lock); + ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); + mutex_unlock(&dev->write_lock); + + return ret; } static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, @@ -217,7 +226,10 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, { struct gk20a *g = dev->g; + mutex_lock(&dev->write_lock); dev->filter = args->filter; + mutex_unlock(&dev->write_lock); + if (g->ops.fecs_trace.set_filter) g->ops.fecs_trace.set_filter(g, &dev->filter); return 0; @@ -226,7 +238,10 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, struct nvgpu_ctxsw_trace_filter_args *args) { + mutex_lock(&dev->write_lock); args->filter = dev->filter; + mutex_unlock(&dev->write_lock); + return 0; } @@ -281,7 +296,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) /* Allow only one user for this device */ dev = &trace->devs[vmid]; - mutex_lock(&dev->lock); + mutex_lock(&dev->write_lock); if (dev->hdr) { err = -EBUSY; goto done; @@ -309,7 +324,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) } done: - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); idle: gk20a_idle(g->dev); @@ -320,20 +335,21 @@ idle: int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) { struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); - mutex_lock(&dev->lock); - if (dev->write_enabled) - gk20a_ctxsw_dev_ioctl_trace_disable(dev); + g->ops.fecs_trace.disable(g); + + mutex_lock(&dev->write_lock); + dev->write_enabled = false; + mutex_unlock(&dev->write_lock); if (dev->hdr) { dev->g->ops.fecs_trace.free_user_buffer(dev->g); dev->hdr = NULL; } - mutex_unlock(&dev->lock); - return 0; } @@ -359,8 +375,6 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, return -EFAULT; } - mutex_lock(&dev->lock); - switch (cmd) { case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); @@ -381,9 +395,7 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_ctxsw_trace_filter_args *) buf); break; case NVGPU_CTXSW_IOCTL_POLL: - mutex_unlock(&dev->lock); err = gk20a_ctxsw_dev_ioctl_poll(dev); - mutex_lock(&dev->lock); break; default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", @@ -391,8 +403,6 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, err = -ENOTTY; } - mutex_unlock(&dev->lock); - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); @@ -407,11 +417,11 @@ unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); - mutex_lock(&dev->lock); + mutex_lock(&dev->write_lock); poll_wait(filp, &dev->readout_wq, wait); if (!ring_is_empty(hdr)) mask |= POLLIN | POLLRDNORM; - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); return mask; } @@ -475,7 +485,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g) dev->hdr = NULL; dev->write_enabled = false; init_waitqueue_head(&dev->readout_wq); - mutex_init(&dev->lock); + mutex_init(&dev->write_lock); atomic_set(&dev->vma_ref, 0); dev++; } @@ -553,7 +563,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "dev=%p hdr=%p", dev, hdr); - mutex_lock(&dev->lock); + mutex_lock(&dev->write_lock); if (unlikely(!hdr)) { /* device has been released */ @@ -596,7 +606,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", hdr->read_idx, hdr->write_idx, ring_len(hdr)); - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); return ret; drop: @@ -610,7 +620,7 @@ filter: entry->tag, entry->timestamp, reason); done: - mutex_unlock(&dev->lock); + mutex_unlock(&dev->write_lock); return ret; } -- cgit v1.2.2