From 9b70ad24934988141c585d9fd85c59f5c9d58a38 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Tue, 27 Mar 2018 10:27:06 +0300
Subject: gpu: nvgpu: delete semaphore release support

Semaphores don't need to be released from CPU anymore, so clarify the
code by deleting nvgpu_semaphore_release() and refactoring
__nvgpu_semaphore_release() to nvgpu_semaphore_reset() that only
"fast-forwards" the semaphore to a later value.

While doing this, the meaning of nvgpu_semaphore_incr() changes, so
rename it to nvgpu_semaphore_prepare(). Now it's only used to prepare an
nvgpu_semaphore for a value that the HW will increment the sema to.

Also change the BUG_ON that guards sema double-inits into just WARN_ON.

Change-Id: I6f6df368ec5436cc97a229697742b6a4115dca51
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1680361
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c      |  8 ++--
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c |  2 +-
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h  | 65 ++++++++--------------------
 3 files changed, 24 insertions(+), 51 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 65b17304..5befe086 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -216,8 +216,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 		while (tmp_get != put) {
 			job = &ch->joblist.pre_alloc.jobs[tmp_get];
 			if (job->post_fence->semaphore) {
-				__nvgpu_semaphore_release(
-					job->post_fence->semaphore, true,
+				nvgpu_semaphore_reset(
+					job->post_fence->semaphore,
 					ch->hw_sema);
 				released_job_semaphore = true;
 			}
@@ -228,8 +228,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 				&ch->joblist.dynamic.jobs,
 				channel_gk20a_job, list) {
 			if (job->post_fence->semaphore) {
-				__nvgpu_semaphore_release(
-					job->post_fence->semaphore, true,
+				nvgpu_semaphore_reset(
+					job->post_fence->semaphore,
 					ch->hw_sema);
 				released_job_semaphore = true;
 			}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index e94a112f..d87b8661 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -410,7 +410,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 	 * incr the underlying sema next_value.
 	 */
 	if (!acquire)
-		nvgpu_semaphore_incr(s, c->hw_sema);
+		nvgpu_semaphore_prepare(s, c->hw_sema);
 
 	/* semaphore_a */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 771cc7e1..373c5745 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,37 +276,18 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 }
 
 /*
- * If @force is set then this will not wait for the underlying semaphore to
- * catch up to the passed semaphore threshold.
+ * Fast-forward the hw sema to the threshold represented by sema_thresh.
  */
-static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
-					     bool force,
+static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,
 					     struct nvgpu_semaphore_int *hw_sema)
 {
 	u32 current_val;
-	u32 threshold = nvgpu_semaphore_get_value(s);
-	int attempts = 0;
+	u32 threshold = nvgpu_semaphore_get_value(sema_thresh);
 
-	/*
-	 * Wait until the sema value is 1 less than the write value. That
-	 * way this function is essentially an increment.
-	 *
-	 * TODO: tune the wait a little better.
-	 */
-	while (!__nvgpu_semaphore_value_released(threshold - 1,
-			current_val = nvgpu_semaphore_read(s))) {
-		if (force)
-			break;
-		nvgpu_msleep(100);
-		attempts += 1;
-		if (attempts > 100) {
-			WARN(1, "Stall on sema release!");
-			return;
-		}
-	}
+	current_val = nvgpu_semaphore_read(sema_thresh);
 
 	/*
-	 * If the semaphore has already passed the value we would write then
+	 * If the semaphore has already reached the value we would write then
 	 * this is really just a NO-OP.
 	 */
 	if (__nvgpu_semaphore_value_released(threshold, current_val))
@@ -315,36 +296,28 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
 	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
 			hw_sema->location.offset, threshold);
 
-	gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a,
-			     "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
-}
-
-static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s,
-					   struct nvgpu_semaphore_int *hw_sema)
-{
-	__nvgpu_semaphore_release(s, false, hw_sema);
+	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
+			hw_sema->ch->chid, current_val, threshold);
 }
 
 /*
- * Configure a software based increment on this semaphore. This is useful for
- * when we want the GPU to wait on a SW event before processing a channel.
- * Another way to describe this is when the GPU needs to wait on a SW pre-fence.
- * The pre-fence signals SW which in turn calls nvgpu_semaphore_release() which
- * then allows the GPU to continue.
- *
- * Also used to prep a semaphore for an INCR by the GPU.
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
  */
-static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s,
+static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
 		struct nvgpu_semaphore_int *hw_sema)
 {
-	BUG_ON(s->incremented);
+	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+
+	/* "s" should be an uninitialized sema. */
+	WARN_ON(s->incremented);
 
-	nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value));
+	nvgpu_atomic_set(&s->value, next);
 	s->incremented = 1;
 
-	gpu_sema_verbose_dbg(s->g,
-			     "INCR sema for c=%d (%u)",
-			     hw_sema->ch->chid,
-			     nvgpu_atomic_read(&hw_sema->next_value));
+	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+			     hw_sema->ch->chid, next);
 }
 #endif
-- 
cgit v1.2.2