summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-01-21 17:50:23 -0500
committerAlex Waterman <alexw@nvidia.com>2016-01-27 13:59:00 -0500
commitf7d219dd1c95ba9de2349b4de9f8cb510ec001cb (patch)
treeae250744ba1c042b9ac82630af89d4e1b8a16e82 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
parentaa74098f29b3027111baf17c21d6e30a3656e2d0 (diff)
gpu: nvgpu: Fix semaphore race condition
A race condition existed in gk20a_channel_semaphore_wait_fd(). In some instances the semaphore underlying the sync_fence being waited on would have already signaled. This would cause the subsequent sync_fence_wait_async() call to return 1 and do nothing. Normally, the sync_fence_wait_async() call would release the newly created semaphore but in the above case that would not happen and hang any channel waiting on that semaphore. To fix this problem if sync_fence_wait_async() returns 1 immediately release the newly created semaphore. Bug 1604892 Change-Id: I1f5e811695bb099f71b7762835aba4a7e27362ec Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/935910 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c14
1 files changed, 12 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 952e6e6a..bba18789 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -456,7 +456,7 @@ static int gk20a_channel_semaphore_wait_fd(
456 struct priv_cmd_entry *wait_cmd = NULL; 456 struct priv_cmd_entry *wait_cmd = NULL;
457 struct wait_fence_work *w; 457 struct wait_fence_work *w;
458 int written; 458 int written;
459 int err; 459 int err, ret;
460 u64 va; 460 u64 va;
461 461
462 sync_fence = gk20a_sync_fence_fdget(fd); 462 sync_fence = gk20a_sync_fence_fdget(fd);
@@ -490,8 +490,18 @@ static int gk20a_channel_semaphore_wait_fd(
490 va = gk20a_semaphore_gpu_va(w->sema, c->vm); 490 va = gk20a_semaphore_gpu_va(w->sema, c->vm);
491 /* GPU unblocked when when the semaphore value becomes 1. */ 491 /* GPU unblocked when when the semaphore value becomes 1. */
492 written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); 492 written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);
493
493 WARN_ON(written != wait_cmd->size); 494 WARN_ON(written != wait_cmd->size);
494 sync_fence_wait_async(sync_fence, &w->waiter); 495 ret = sync_fence_wait_async(sync_fence, &w->waiter);
496
497 /*
498 * If the sync_fence has already signaled then the above async_wait
499 * will never trigger. This causes the semaphore release op to never
500 * happen which, in turn, hangs the GPU. That's bad. So let's just
501 * do the semaphore_release right now.
502 */
503 if (ret == 1)
504 gk20a_semaphore_release(w->sema);
495 505
496 /* XXX - this fixes an actual bug, we need to hold a ref to this 506 /* XXX - this fixes an actual bug, we need to hold a ref to this
497 semaphore while the job is in flight. */ 507 semaphore while the job is in flight. */