diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-01-21 17:50:23 -0500 |
---|---|---|
committer | Alex Waterman <alexw@nvidia.com> | 2016-01-27 13:59:00 -0500 |
commit | f7d219dd1c95ba9de2349b4de9f8cb510ec001cb (patch) | |
tree | ae250744ba1c042b9ac82630af89d4e1b8a16e82 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |
parent | aa74098f29b3027111baf17c21d6e30a3656e2d0 (diff) |
gpu: nvgpu: Fix semaphore race condition
A race condition existed in gk20a_channel_semaphore_wait_fd().
In some instances the semaphore underlying the sync_fence being
waited on would have already signaled. This would cause the
subsequent sync_fence_wait_async() call to return 1 and do
nothing. Normally, the sync_fence_wait_async() call would
release the newly created semaphore but in the above case that
would not happen and hang any channel waiting on that semaphore.
To fix this problem if sync_fence_wait_async() returns 1
immediately release the newly created semaphore.
Bug 1604892
Change-Id: I1f5e811695bb099f71b7762835aba4a7e27362ec
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/935910
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 952e6e6a..bba18789 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -456,7 +456,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
456 | struct priv_cmd_entry *wait_cmd = NULL; | 456 | struct priv_cmd_entry *wait_cmd = NULL; |
457 | struct wait_fence_work *w; | 457 | struct wait_fence_work *w; |
458 | int written; | 458 | int written; |
459 | int err; | 459 | int err, ret; |
460 | u64 va; | 460 | u64 va; |
461 | 461 | ||
462 | sync_fence = gk20a_sync_fence_fdget(fd); | 462 | sync_fence = gk20a_sync_fence_fdget(fd); |
@@ -490,8 +490,18 @@ static int gk20a_channel_semaphore_wait_fd( | |||
490 | va = gk20a_semaphore_gpu_va(w->sema, c->vm); | 490 | va = gk20a_semaphore_gpu_va(w->sema, c->vm); |
491 | /* GPU unblocked when when the semaphore value becomes 1. */ | 491 | /* GPU unblocked when when the semaphore value becomes 1. */ |
492 | written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); | 492 | written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); |
493 | |||
493 | WARN_ON(written != wait_cmd->size); | 494 | WARN_ON(written != wait_cmd->size); |
494 | sync_fence_wait_async(sync_fence, &w->waiter); | 495 | ret = sync_fence_wait_async(sync_fence, &w->waiter); |
496 | |||
497 | /* | ||
498 | * If the sync_fence has already signaled then the above async_wait | ||
499 | * will never trigger. This causes the semaphore release op to never | ||
500 | * happen which, in turn, hangs the GPU. That's bad. So let's just | ||
501 | * do the semaphore_release right now. | ||
502 | */ | ||
503 | if (ret == 1) | ||
504 | gk20a_semaphore_release(w->sema); | ||
495 | 505 | ||
496 | /* XXX - this fixes an actual bug, we need to hold a ref to this | 506 | /* XXX - this fixes an actual bug, we need to hold a ref to this |
497 | semaphore while the job is in flight. */ | 507 | semaphore while the job is in flight. */ |