From b5b4353ca6cc9b6457ddccc00bf87538291870fc Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Tue, 20 Mar 2018 04:51:23 -0700 Subject: gpu: nvgpu: set safe state for user managed syncpoints MAX/threshold value of user managed syncpoint is not tracked by nvgpu So if channel is reset by nvgpu there could be waiters still waiting on some user syncpoint fence Fix this by setting a large safe value to user managed syncpoint when aborting the channel and when closing the channel We right now increment the current value by 0x10000 which should be sufficient to release any pending waiter Bug 200326065 Jira NVGPU-179 Change-Id: Ie6432369bb4c21bd922c14b8d5a74c1477116f0b Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1678768 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 78953558..65b17304 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -196,6 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) nvgpu_mutex_acquire(&ch->sync_lock); if (ch->sync) ch->sync->set_min_eq_max(ch->sync); + if (ch->user_sync) + ch->user_sync->set_safe_state(ch->user_sync); nvgpu_mutex_release(&ch->sync_lock); /* release all job semaphores (applies only to jobs that use @@ -435,11 +437,18 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) /* sync must be destroyed before releasing channel vm */ nvgpu_mutex_acquire(&ch->sync_lock); if (ch->sync) { - gk20a_channel_sync_destroy(ch->sync); + gk20a_channel_sync_destroy(ch->sync, false); ch->sync = NULL; } if (ch->user_sync) { - gk20a_channel_sync_destroy(ch->user_sync); + /* + * Set user managed syncpoint to safe state + * But it's already done if channel has timedout + */ + if (ch->has_timedout) + gk20a_channel_sync_destroy(ch->user_sync, false); + else + gk20a_channel_sync_destroy(ch->user_sync, true); ch->user_sync = NULL; } nvgpu_mutex_release(&ch->sync_lock); @@ -1211,7 +1220,7 @@ clean_up_prealloc: channel_gk20a_free_prealloc_resources(c); clean_up_sync: if (c->sync) { - gk20a_channel_sync_destroy(c->sync); + gk20a_channel_sync_destroy(c->sync, false); c->sync = NULL; } clean_up_unmap: @@ -1905,7 +1914,8 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, if (nvgpu_atomic_dec_and_test( &c->sync->refcount) && g->aggressive_sync_destroy) { - gk20a_channel_sync_destroy(c->sync); + gk20a_channel_sync_destroy(c->sync, + false); c->sync = NULL; } nvgpu_mutex_release(&c->sync_lock); -- cgit v1.2.2