From b5b4353ca6cc9b6457ddccc00bf87538291870fc Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 20 Mar 2018 04:51:23 -0700
Subject: gpu: nvgpu: set safe state for user managed syncpoints

MAX/threshold value of user managed syncpoint is not tracked by nvgpu
So if channel is reset by nvgpu there could be waiters still waiting on some
user syncpoint fence

Fix this by setting a large safe value to user managed syncpoint when aborting
the channel and when closing the channel

We right now increment the current value by 0x10000 which should be sufficient
to release any pending waiter

Bug 200326065
Jira NVGPU-179

Change-Id: Ie6432369bb4c21bd922c14b8d5a74c1477116f0b
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1678768
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 78953558..65b17304 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -196,6 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 	nvgpu_mutex_acquire(&ch->sync_lock);
 	if (ch->sync)
 		ch->sync->set_min_eq_max(ch->sync);
+	if (ch->user_sync)
+		ch->user_sync->set_safe_state(ch->user_sync);
 	nvgpu_mutex_release(&ch->sync_lock);
 
 	/* release all job semaphores (applies only to jobs that use
@@ -435,11 +437,18 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	/* sync must be destroyed before releasing channel vm */
 	nvgpu_mutex_acquire(&ch->sync_lock);
 	if (ch->sync) {
-		gk20a_channel_sync_destroy(ch->sync);
+		gk20a_channel_sync_destroy(ch->sync, false);
 		ch->sync = NULL;
 	}
 	if (ch->user_sync) {
-		gk20a_channel_sync_destroy(ch->user_sync);
+		/*
+		 * Set user managed syncpoint to safe state
+		 * But it's already done if channel has timedout
+		 */
+		if (ch->has_timedout)
+			gk20a_channel_sync_destroy(ch->user_sync, false);
+		else
+			gk20a_channel_sync_destroy(ch->user_sync, true);
 		ch->user_sync = NULL;
 	}
 	nvgpu_mutex_release(&ch->sync_lock);
@@ -1211,7 +1220,7 @@ clean_up_prealloc:
 		channel_gk20a_free_prealloc_resources(c);
 clean_up_sync:
 	if (c->sync) {
-		gk20a_channel_sync_destroy(c->sync);
+		gk20a_channel_sync_destroy(c->sync, false);
 		c->sync = NULL;
 	}
 clean_up_unmap:
@@ -1905,7 +1914,8 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 				if (nvgpu_atomic_dec_and_test(
 					&c->sync->refcount) &&
 						g->aggressive_sync_destroy) {
-					gk20a_channel_sync_destroy(c->sync);
+					gk20a_channel_sync_destroy(c->sync,
+						false);
 					c->sync = NULL;
 				}
 				nvgpu_mutex_release(&c->sync_lock);
-- 
cgit v1.2.2