diff options
author | Thomas Fleury <tfleury@nvidia.com> | 2016-04-28 11:50:37 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-21 14:33:36 -0400 |
commit | 989f7f70c3b7a77467cb216932490e48dc03c64c (patch) | |
tree | 3a3700c52afbc068e67dc5278bb4d4feaaeef7f8 /drivers/gpu/nvgpu/gk20a | |
parent | 47e3d2e90511b1cba68e46233896a918b32b5d33 (diff) |
gpu: nvgpu: fix deadlock on FECS trace disable
fecs_trace.disable() method kills polling thread
and waits until it completes. dev->lock must not
be held while calling this function, as polling
thread may attempt to acquire it, leading to a
deadlock. Also fixed potential deadlock in
ioctl to disable trace.
Bug 1758405
Change-Id: I8f6baaba8093ce92961413f6152ee8b81beca3e4
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1139296
(cherry picked from commit 3391a911e1fa9170a5aa989c81bcba6a2f79a9d4)
Reviewed-on: http://git-master/r/1150047
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 62 |
1 files changed, 36 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 0fa9e65a..19ba6dde 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -46,7 +46,7 @@ struct gk20a_ctxsw_dev { | |||
46 | 46 | ||
47 | atomic_t vma_ref; | 47 | atomic_t vma_ref; |
48 | 48 | ||
49 | struct mutex lock; | 49 | struct mutex write_lock; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | 52 | ||
@@ -87,16 +87,16 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | |||
87 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | 87 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, |
88 | "filp=%p buf=%p size=%zu", filp, buf, size); | 88 | "filp=%p buf=%p size=%zu", filp, buf, size); |
89 | 89 | ||
90 | mutex_lock(&dev->lock); | 90 | mutex_lock(&dev->write_lock); |
91 | while (ring_is_empty(hdr)) { | 91 | while (ring_is_empty(hdr)) { |
92 | mutex_unlock(&dev->lock); | 92 | mutex_unlock(&dev->write_lock); |
93 | if (filp->f_flags & O_NONBLOCK) | 93 | if (filp->f_flags & O_NONBLOCK) |
94 | return -EAGAIN; | 94 | return -EAGAIN; |
95 | err = wait_event_interruptible(dev->readout_wq, | 95 | err = wait_event_interruptible(dev->readout_wq, |
96 | !ring_is_empty(hdr)); | 96 | !ring_is_empty(hdr)); |
97 | if (err) | 97 | if (err) |
98 | return err; | 98 | return err; |
99 | mutex_lock(&dev->lock); | 99 | mutex_lock(&dev->write_lock); |
100 | } | 100 | } |
101 | 101 | ||
102 | while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { | 102 | while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { |
@@ -105,7 +105,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | |||
105 | 105 | ||
106 | if (copy_to_user(entry, &dev->ents[hdr->read_idx], | 106 | if (copy_to_user(entry, &dev->ents[hdr->read_idx], |
107 | sizeof(*entry))) { | 107 | sizeof(*entry))) { |
108 | mutex_unlock(&dev->lock); | 108 | mutex_unlock(&dev->write_lock); |
109 | return -EFAULT; | 109 | return -EFAULT; |
110 | } | 110 | } |
111 | 111 | ||
@@ -122,7 +122,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | |||
122 | hdr->read_idx); | 122 | hdr->read_idx); |
123 | 123 | ||
124 | *off = hdr->read_idx; | 124 | *off = hdr->read_idx; |
125 | mutex_unlock(&dev->lock); | 125 | mutex_unlock(&dev->write_lock); |
126 | 126 | ||
127 | return copied; | 127 | return copied; |
128 | } | 128 | } |
@@ -130,7 +130,9 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | |||
130 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | 130 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) |
131 | { | 131 | { |
132 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | 132 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); |
133 | mutex_lock(&dev->write_lock); | ||
133 | dev->write_enabled = true; | 134 | dev->write_enabled = true; |
135 | mutex_unlock(&dev->write_lock); | ||
134 | dev->g->ops.fecs_trace.enable(dev->g); | 136 | dev->g->ops.fecs_trace.enable(dev->g); |
135 | return 0; | 137 | return 0; |
136 | } | 138 | } |
@@ -139,7 +141,9 @@ static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | |||
139 | { | 141 | { |
140 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | 142 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); |
141 | dev->g->ops.fecs_trace.disable(dev->g); | 143 | dev->g->ops.fecs_trace.disable(dev->g); |
144 | mutex_lock(&dev->write_lock); | ||
142 | dev->write_enabled = false; | 145 | dev->write_enabled = false; |
146 | mutex_unlock(&dev->write_lock); | ||
143 | return 0; | 147 | return 0; |
144 | } | 148 | } |
145 | 149 | ||
@@ -203,13 +207,18 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | |||
203 | struct nvgpu_ctxsw_ring_setup_args *args) | 207 | struct nvgpu_ctxsw_ring_setup_args *args) |
204 | { | 208 | { |
205 | size_t size = args->size; | 209 | size_t size = args->size; |
210 | int ret; | ||
206 | 211 | ||
207 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | 212 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); |
208 | 213 | ||
209 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | 214 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) |
210 | return -EINVAL; | 215 | return -EINVAL; |
211 | 216 | ||
212 | return gk20a_ctxsw_dev_alloc_buffer(dev, size); | 217 | mutex_lock(&dev->write_lock); |
218 | ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
219 | mutex_unlock(&dev->write_lock); | ||
220 | |||
221 | return ret; | ||
213 | } | 222 | } |
214 | 223 | ||
215 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | 224 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, |
@@ -217,7 +226,10 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | |||
217 | { | 226 | { |
218 | struct gk20a *g = dev->g; | 227 | struct gk20a *g = dev->g; |
219 | 228 | ||
229 | mutex_lock(&dev->write_lock); | ||
220 | dev->filter = args->filter; | 230 | dev->filter = args->filter; |
231 | mutex_unlock(&dev->write_lock); | ||
232 | |||
221 | if (g->ops.fecs_trace.set_filter) | 233 | if (g->ops.fecs_trace.set_filter) |
222 | g->ops.fecs_trace.set_filter(g, &dev->filter); | 234 | g->ops.fecs_trace.set_filter(g, &dev->filter); |
223 | return 0; | 235 | return 0; |
@@ -226,7 +238,10 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | |||
226 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | 238 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, |
227 | struct nvgpu_ctxsw_trace_filter_args *args) | 239 | struct nvgpu_ctxsw_trace_filter_args *args) |
228 | { | 240 | { |
241 | mutex_lock(&dev->write_lock); | ||
229 | args->filter = dev->filter; | 242 | args->filter = dev->filter; |
243 | mutex_unlock(&dev->write_lock); | ||
244 | |||
230 | return 0; | 245 | return 0; |
231 | } | 246 | } |
232 | 247 | ||
@@ -281,7 +296,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | |||
281 | 296 | ||
282 | /* Allow only one user for this device */ | 297 | /* Allow only one user for this device */ |
283 | dev = &trace->devs[vmid]; | 298 | dev = &trace->devs[vmid]; |
284 | mutex_lock(&dev->lock); | 299 | mutex_lock(&dev->write_lock); |
285 | if (dev->hdr) { | 300 | if (dev->hdr) { |
286 | err = -EBUSY; | 301 | err = -EBUSY; |
287 | goto done; | 302 | goto done; |
@@ -309,7 +324,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | |||
309 | } | 324 | } |
310 | 325 | ||
311 | done: | 326 | done: |
312 | mutex_unlock(&dev->lock); | 327 | mutex_unlock(&dev->write_lock); |
313 | 328 | ||
314 | idle: | 329 | idle: |
315 | gk20a_idle(g->dev); | 330 | gk20a_idle(g->dev); |
@@ -320,20 +335,21 @@ idle: | |||
320 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | 335 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) |
321 | { | 336 | { |
322 | struct gk20a_ctxsw_dev *dev = filp->private_data; | 337 | struct gk20a_ctxsw_dev *dev = filp->private_data; |
338 | struct gk20a *g = dev->g; | ||
323 | 339 | ||
324 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | 340 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); |
325 | 341 | ||
326 | mutex_lock(&dev->lock); | 342 | g->ops.fecs_trace.disable(g); |
327 | if (dev->write_enabled) | 343 | |
328 | gk20a_ctxsw_dev_ioctl_trace_disable(dev); | 344 | mutex_lock(&dev->write_lock); |
345 | dev->write_enabled = false; | ||
346 | mutex_unlock(&dev->write_lock); | ||
329 | 347 | ||
330 | if (dev->hdr) { | 348 | if (dev->hdr) { |
331 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); | 349 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); |
332 | dev->hdr = NULL; | 350 | dev->hdr = NULL; |
333 | } | 351 | } |
334 | 352 | ||
335 | mutex_unlock(&dev->lock); | ||
336 | |||
337 | return 0; | 353 | return 0; |
338 | } | 354 | } |
339 | 355 | ||
@@ -359,8 +375,6 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | |||
359 | return -EFAULT; | 375 | return -EFAULT; |
360 | } | 376 | } |
361 | 377 | ||
362 | mutex_lock(&dev->lock); | ||
363 | |||
364 | switch (cmd) { | 378 | switch (cmd) { |
365 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | 379 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: |
366 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | 380 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); |
@@ -381,9 +395,7 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | |||
381 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | 395 | (struct nvgpu_ctxsw_trace_filter_args *) buf); |
382 | break; | 396 | break; |
383 | case NVGPU_CTXSW_IOCTL_POLL: | 397 | case NVGPU_CTXSW_IOCTL_POLL: |
384 | mutex_unlock(&dev->lock); | ||
385 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | 398 | err = gk20a_ctxsw_dev_ioctl_poll(dev); |
386 | mutex_lock(&dev->lock); | ||
387 | break; | 399 | break; |
388 | default: | 400 | default: |
389 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | 401 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", |
@@ -391,8 +403,6 @@ long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | |||
391 | err = -ENOTTY; | 403 | err = -ENOTTY; |
392 | } | 404 | } |
393 | 405 | ||
394 | mutex_unlock(&dev->lock); | ||
395 | |||
396 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | 406 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) |
397 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | 407 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); |
398 | 408 | ||
@@ -407,11 +417,11 @@ unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | |||
407 | 417 | ||
408 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | 418 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); |
409 | 419 | ||
410 | mutex_lock(&dev->lock); | 420 | mutex_lock(&dev->write_lock); |
411 | poll_wait(filp, &dev->readout_wq, wait); | 421 | poll_wait(filp, &dev->readout_wq, wait); |
412 | if (!ring_is_empty(hdr)) | 422 | if (!ring_is_empty(hdr)) |
413 | mask |= POLLIN | POLLRDNORM; | 423 | mask |= POLLIN | POLLRDNORM; |
414 | mutex_unlock(&dev->lock); | 424 | mutex_unlock(&dev->write_lock); |
415 | 425 | ||
416 | return mask; | 426 | return mask; |
417 | } | 427 | } |
@@ -475,7 +485,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g) | |||
475 | dev->hdr = NULL; | 485 | dev->hdr = NULL; |
476 | dev->write_enabled = false; | 486 | dev->write_enabled = false; |
477 | init_waitqueue_head(&dev->readout_wq); | 487 | init_waitqueue_head(&dev->readout_wq); |
478 | mutex_init(&dev->lock); | 488 | mutex_init(&dev->write_lock); |
479 | atomic_set(&dev->vma_ref, 0); | 489 | atomic_set(&dev->vma_ref, 0); |
480 | dev++; | 490 | dev++; |
481 | } | 491 | } |
@@ -553,7 +563,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, | |||
553 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | 563 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, |
554 | "dev=%p hdr=%p", dev, hdr); | 564 | "dev=%p hdr=%p", dev, hdr); |
555 | 565 | ||
556 | mutex_lock(&dev->lock); | 566 | mutex_lock(&dev->write_lock); |
557 | 567 | ||
558 | if (unlikely(!hdr)) { | 568 | if (unlikely(!hdr)) { |
559 | /* device has been released */ | 569 | /* device has been released */ |
@@ -596,7 +606,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, | |||
596 | gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | 606 | gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", |
597 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | 607 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); |
598 | 608 | ||
599 | mutex_unlock(&dev->lock); | 609 | mutex_unlock(&dev->write_lock); |
600 | return ret; | 610 | return ret; |
601 | 611 | ||
602 | drop: | 612 | drop: |
@@ -610,7 +620,7 @@ filter: | |||
610 | entry->tag, entry->timestamp, reason); | 620 | entry->tag, entry->timestamp, reason); |
611 | 621 | ||
612 | done: | 622 | done: |
613 | mutex_unlock(&dev->lock); | 623 | mutex_unlock(&dev->write_lock); |
614 | return ret; | 624 | return ret; |
615 | } | 625 | } |
616 | 626 | ||