summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
diff options
context:
space:
mode:
authorThomas Fleury <tfleury@nvidia.com>2016-04-28 11:50:37 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-05-21 14:33:36 -0400
commit989f7f70c3b7a77467cb216932490e48dc03c64c (patch)
tree3a3700c52afbc068e67dc5278bb4d4feaaeef7f8 /drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
parent47e3d2e90511b1cba68e46233896a918b32b5d33 (diff)
gpu: nvgpu: fix deadlock on FECS trace disable
fecs_trace.disable() method kills polling thread and waits until it completes. dev->lock must not be held while calling this function, as polling thread may attempt to acquire it, leading to a deadlock. Also fixed potential deadlock in ioctl to disable trace. Bug 1758405 Change-Id: I8f6baaba8093ce92961413f6152ee8b81beca3e4 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1139296 (cherry picked from commit 3391a911e1fa9170a5aa989c81bcba6a2f79a9d4) Reviewed-on: http://git-master/r/1150047 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Richard Zhao <rizhao@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c62
1 files changed, 36 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 0fa9e65a..19ba6dde 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -46,7 +46,7 @@ struct gk20a_ctxsw_dev {
46 46
47 atomic_t vma_ref; 47 atomic_t vma_ref;
48 48
49 struct mutex lock; 49 struct mutex write_lock;
50}; 50};
51 51
52 52
@@ -87,16 +87,16 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
87 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, 87 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
88 "filp=%p buf=%p size=%zu", filp, buf, size); 88 "filp=%p buf=%p size=%zu", filp, buf, size);
89 89
90 mutex_lock(&dev->lock); 90 mutex_lock(&dev->write_lock);
91 while (ring_is_empty(hdr)) { 91 while (ring_is_empty(hdr)) {
92 mutex_unlock(&dev->lock); 92 mutex_unlock(&dev->write_lock);
93 if (filp->f_flags & O_NONBLOCK) 93 if (filp->f_flags & O_NONBLOCK)
94 return -EAGAIN; 94 return -EAGAIN;
95 err = wait_event_interruptible(dev->readout_wq, 95 err = wait_event_interruptible(dev->readout_wq,
96 !ring_is_empty(hdr)); 96 !ring_is_empty(hdr));
97 if (err) 97 if (err)
98 return err; 98 return err;
99 mutex_lock(&dev->lock); 99 mutex_lock(&dev->write_lock);
100 } 100 }
101 101
102 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { 102 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
@@ -105,7 +105,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
105 105
106 if (copy_to_user(entry, &dev->ents[hdr->read_idx], 106 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
107 sizeof(*entry))) { 107 sizeof(*entry))) {
108 mutex_unlock(&dev->lock); 108 mutex_unlock(&dev->write_lock);
109 return -EFAULT; 109 return -EFAULT;
110 } 110 }
111 111
@@ -122,7 +122,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
122 hdr->read_idx); 122 hdr->read_idx);
123 123
124 *off = hdr->read_idx; 124 *off = hdr->read_idx;
125 mutex_unlock(&dev->lock); 125 mutex_unlock(&dev->write_lock);
126 126
127 return copied; 127 return copied;
128} 128}
@@ -130,7 +130,9 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
130static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) 130static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
131{ 131{
132 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); 132 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
133 mutex_lock(&dev->write_lock);
133 dev->write_enabled = true; 134 dev->write_enabled = true;
135 mutex_unlock(&dev->write_lock);
134 dev->g->ops.fecs_trace.enable(dev->g); 136 dev->g->ops.fecs_trace.enable(dev->g);
135 return 0; 137 return 0;
136} 138}
@@ -139,7 +141,9 @@ static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
139{ 141{
140 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); 142 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
141 dev->g->ops.fecs_trace.disable(dev->g); 143 dev->g->ops.fecs_trace.disable(dev->g);
144 mutex_lock(&dev->write_lock);
142 dev->write_enabled = false; 145 dev->write_enabled = false;
146 mutex_unlock(&dev->write_lock);
143 return 0; 147 return 0;
144} 148}
145 149
@@ -203,13 +207,18 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
203 struct nvgpu_ctxsw_ring_setup_args *args) 207 struct nvgpu_ctxsw_ring_setup_args *args)
204{ 208{
205 size_t size = args->size; 209 size_t size = args->size;
210 int ret;
206 211
207 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); 212 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
208 213
209 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) 214 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
210 return -EINVAL; 215 return -EINVAL;
211 216
212 return gk20a_ctxsw_dev_alloc_buffer(dev, size); 217 mutex_lock(&dev->write_lock);
218 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
219 mutex_unlock(&dev->write_lock);
220
221 return ret;
213} 222}
214 223
215static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, 224static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
@@ -217,7 +226,10 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
217{ 226{
218 struct gk20a *g = dev->g; 227 struct gk20a *g = dev->g;
219 228
229 mutex_lock(&dev->write_lock);
220 dev->filter = args->filter; 230 dev->filter = args->filter;
231 mutex_unlock(&dev->write_lock);
232
221 if (g->ops.fecs_trace.set_filter) 233 if (g->ops.fecs_trace.set_filter)
222 g->ops.fecs_trace.set_filter(g, &dev->filter); 234 g->ops.fecs_trace.set_filter(g, &dev->filter);
223 return 0; 235 return 0;
@@ -226,7 +238,10 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
226static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, 238static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
227 struct nvgpu_ctxsw_trace_filter_args *args) 239 struct nvgpu_ctxsw_trace_filter_args *args)
228{ 240{
241 mutex_lock(&dev->write_lock);
229 args->filter = dev->filter; 242 args->filter = dev->filter;
243 mutex_unlock(&dev->write_lock);
244
230 return 0; 245 return 0;
231} 246}
232 247
@@ -281,7 +296,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
281 296
282 /* Allow only one user for this device */ 297 /* Allow only one user for this device */
283 dev = &trace->devs[vmid]; 298 dev = &trace->devs[vmid];
284 mutex_lock(&dev->lock); 299 mutex_lock(&dev->write_lock);
285 if (dev->hdr) { 300 if (dev->hdr) {
286 err = -EBUSY; 301 err = -EBUSY;
287 goto done; 302 goto done;
@@ -309,7 +324,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
309 } 324 }
310 325
311done: 326done:
312 mutex_unlock(&dev->lock); 327 mutex_unlock(&dev->write_lock);
313 328
314idle: 329idle:
315 gk20a_idle(g->dev); 330 gk20a_idle(g->dev);
@@ -320,20 +335,21 @@ idle:
320int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) 335int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
321{ 336{
322 struct gk20a_ctxsw_dev *dev = filp->private_data; 337 struct gk20a_ctxsw_dev *dev = filp->private_data;
338 struct gk20a *g = dev->g;
323 339
324 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); 340 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
325 341
326 mutex_lock(&dev->lock); 342 g->ops.fecs_trace.disable(g);
327 if (dev->write_enabled) 343
328 gk20a_ctxsw_dev_ioctl_trace_disable(dev); 344 mutex_lock(&dev->write_lock);
345 dev->write_enabled = false;
346 mutex_unlock(&dev->write_lock);
329 347
330 if (dev->hdr) { 348 if (dev->hdr) {
331 dev->g->ops.fecs_trace.free_user_buffer(dev->g); 349 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
332 dev->hdr = NULL; 350 dev->hdr = NULL;
333 } 351 }
334 352
335 mutex_unlock(&dev->lock);
336
337 return 0; 353 return 0;
338} 354}
339 355
@@ -359,8 +375,6 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
359 return -EFAULT; 375 return -EFAULT;
360 } 376 }
361 377
362 mutex_lock(&dev->lock);
363
364 switch (cmd) { 378 switch (cmd) {
365 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: 379 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
366 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); 380 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
@@ -381,9 +395,7 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
381 (struct nvgpu_ctxsw_trace_filter_args *) buf); 395 (struct nvgpu_ctxsw_trace_filter_args *) buf);
382 break; 396 break;
383 case NVGPU_CTXSW_IOCTL_POLL: 397 case NVGPU_CTXSW_IOCTL_POLL:
384 mutex_unlock(&dev->lock);
385 err = gk20a_ctxsw_dev_ioctl_poll(dev); 398 err = gk20a_ctxsw_dev_ioctl_poll(dev);
386 mutex_lock(&dev->lock);
387 break; 399 break;
388 default: 400 default:
389 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", 401 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
@@ -391,8 +403,6 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
391 err = -ENOTTY; 403 err = -ENOTTY;
392 } 404 }
393 405
394 mutex_unlock(&dev->lock);
395
396 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 406 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
397 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); 407 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
398 408
@@ -407,11 +417,11 @@ unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
407 417
408 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); 418 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
409 419
410 mutex_lock(&dev->lock); 420 mutex_lock(&dev->write_lock);
411 poll_wait(filp, &dev->readout_wq, wait); 421 poll_wait(filp, &dev->readout_wq, wait);
412 if (!ring_is_empty(hdr)) 422 if (!ring_is_empty(hdr))
413 mask |= POLLIN | POLLRDNORM; 423 mask |= POLLIN | POLLRDNORM;
414 mutex_unlock(&dev->lock); 424 mutex_unlock(&dev->write_lock);
415 425
416 return mask; 426 return mask;
417} 427}
@@ -475,7 +485,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g)
475 dev->hdr = NULL; 485 dev->hdr = NULL;
476 dev->write_enabled = false; 486 dev->write_enabled = false;
477 init_waitqueue_head(&dev->readout_wq); 487 init_waitqueue_head(&dev->readout_wq);
478 mutex_init(&dev->lock); 488 mutex_init(&dev->write_lock);
479 atomic_set(&dev->vma_ref, 0); 489 atomic_set(&dev->vma_ref, 0);
480 dev++; 490 dev++;
481 } 491 }
@@ -553,7 +563,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
553 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, 563 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
554 "dev=%p hdr=%p", dev, hdr); 564 "dev=%p hdr=%p", dev, hdr);
555 565
556 mutex_lock(&dev->lock); 566 mutex_lock(&dev->write_lock);
557 567
558 if (unlikely(!hdr)) { 568 if (unlikely(!hdr)) {
559 /* device has been released */ 569 /* device has been released */
@@ -596,7 +606,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
596 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", 606 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
597 hdr->read_idx, hdr->write_idx, ring_len(hdr)); 607 hdr->read_idx, hdr->write_idx, ring_len(hdr));
598 608
599 mutex_unlock(&dev->lock); 609 mutex_unlock(&dev->write_lock);
600 return ret; 610 return ret;
601 611
602drop: 612drop:
@@ -610,7 +620,7 @@ filter:
610 entry->tag, entry->timestamp, reason); 620 entry->tag, entry->timestamp, reason);
611 621
612done: 622done:
613 mutex_unlock(&dev->lock); 623 mutex_unlock(&dev->write_lock);
614 return ret; 624 return ret;
615} 625}
616 626