From d20a501dcbf2e29d5d319670485f273afa3aefad Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 22 Mar 2018 15:47:43 +0200
Subject: gpu: nvgpu: simplify job semaphore release in abort

Instead of looping all jobs and releasing their semaphores separately,
do just one semaphore release. All the jobs are using the same sema
index, and the final, maximum value of it is known.

Move also this resetting into ch->sync->set_min_eq_max() to be
consistent with syncpoints.

Change-Id: I03601aae67db0a65750c8df6b43387c042d383bd
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1680362
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c      | 44 ----------------------------
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 13 +++++++-
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h  | 28 +++++++++++-------
 3 files changed, 30 insertions(+), 55 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5befe086..8b144864 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
 
 void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 {
-	struct channel_gk20a_job *job, *n;
-	bool released_job_semaphore = false;
-	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
-
 	/* synchronize with actual job cleanup */
 	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
 
@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 		ch->user_sync->set_safe_state(ch->user_sync);
 	nvgpu_mutex_release(&ch->sync_lock);
 
-	/* release all job semaphores (applies only to jobs that use
-	   semaphore synchronization) */
-	channel_gk20a_joblist_lock(ch);
-	if (pre_alloc_enabled) {
-		int tmp_get = ch->joblist.pre_alloc.get;
-		int put = ch->joblist.pre_alloc.put;
-
-		/*
-		 * ensure put is read before any subsequent reads.
-		 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
-		 */
-		nvgpu_smp_rmb();
-
-		while (tmp_get != put) {
-			job = &ch->joblist.pre_alloc.jobs[tmp_get];
-			if (job->post_fence->semaphore) {
-				nvgpu_semaphore_reset(
-					job->post_fence->semaphore,
-					ch->hw_sema);
-				released_job_semaphore = true;
-			}
-			tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
-		}
-	} else {
-		nvgpu_list_for_each_entry_safe(job, n,
-				&ch->joblist.dynamic.jobs,
-				channel_gk20a_job, list) {
-			if (job->post_fence->semaphore) {
-				nvgpu_semaphore_reset(
-					job->post_fence->semaphore,
-					ch->hw_sema);
-				released_job_semaphore = true;
-			}
-		}
-	}
-	channel_gk20a_joblist_unlock(ch);
-
 	nvgpu_mutex_release(&ch->joblist.cleanup_lock);
 
-	if (released_job_semaphore)
-		nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
-
 	/*
 	 * When closing the channel, this scheduled update holds one ref which
 	 * is waited for before advancing with freeing.
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index d87b8661..bf467210 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(
 
 static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
 {
-	/* Nothing to do. */
+	struct gk20a_channel_semaphore *sp =
+		container_of(s, struct gk20a_channel_semaphore, ops);
+	struct channel_gk20a *c = sp->c;
+	bool updated;
+
+	if (!c->hw_sema)
+		return;
+
+	updated = nvgpu_semaphore_reset(c->hw_sema);
+
+	if (updated)
+		nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
 }
 
 static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 373c5745..9ab6cc67 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 }
 
 /*
- * Fast-forward the hw sema to the threshold represented by sema_thresh.
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
  */
-static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,
-					     struct nvgpu_semaphore_int *hw_sema)
+static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
 {
-	u32 current_val;
-	u32 threshold = nvgpu_semaphore_get_value(sema_thresh);
-
-	current_val = nvgpu_semaphore_read(sema_thresh);
+	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+	u32 current_val = __nvgpu_semaphore_read(hw_sema);
 
 	/*
 	 * If the semaphore has already reached the value we would write then
-	 * this is really just a NO-OP.
+	 * this is really just a NO-OP. However, the sema value shouldn't be
+	 * more than what we expect to be the max.
 	 */
-	if (__nvgpu_semaphore_value_released(threshold, current_val))
-		return;
+
+	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+						     current_val)))
+		return false;
+
+	if (current_val == threshold)
+		return false;
 
 	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
 			hw_sema->location.offset, threshold);
 
 	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
 			hw_sema->ch->chid, current_val, threshold);
+
+	return true;
 }
 
 /*
-- 
cgit v1.2.2