From cee984b00600e02d702a0b5f88f024627a76de23 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Wed, 22 Aug 2018 15:36:04 +0300
Subject: gpu: nvgpu: move semaphore code from header

Move the inlined semaphore functions from semaphore.h to semaphore.c.
Some of those have dependencies that we'd like to avoid in the header
file, and none of them really need to be inline.

Jira NVGPU-967

Change-Id: I40770b924c6eb54034c31e1a99b83daaa9e6a4b7
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1804610
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/semaphore.c        | 139 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 148 +++-------------------------
 2 files changed, 150 insertions(+), 137 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 25bd3be3..5814a737 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/sizes.h>
+#include <nvgpu/channel.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
 {
 	nvgpu_ref_get(&s->ref);
 }
+
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
+{
+	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+		s->location.offset;
+}
+
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
+{
+	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+		s->location.offset;
+}
+
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
+{
+	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+		hw_sema->location.offset;
+}
+
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
+{
+	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+			hw_sema->location.offset);
+}
+
+/*
+ * Read the underlying value from a semaphore.
+ */
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
+{
+	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+			s->location.offset);
+}
+
+/*
+ * Check if "racer" is over "goal" with wraparound handling.
+ */
+static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
+{
+	/*
+	 * Handle wraparound with the same heuristic as the hardware does:
+	 * although the integer will eventually wrap around, consider a sema
+	 * released against a threshold if its value has passed that threshold
+	 * but has not wrapped over half of the u32 range over that threshold;
+	 * such wrapping is unlikely to happen during a sema lifetime.
+	 *
+	 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+	 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+	 * then it needs 0x80000000 increments to wrap again and signal.
+	 *
+	 * Unsigned arithmetic is used because it's well-defined. This is
+	 * effectively the same as: signed_racer - signed_goal > 0.
+	 */
+
+	return racer - goal < 0x80000000;
+}
+
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
+{
+	return (u32)nvgpu_atomic_read(&s->value);
+}
+
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+	u32 sema_val = nvgpu_semaphore_read(s);
+	u32 wait_payload = nvgpu_semaphore_get_value(s);
+
+	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+	return !nvgpu_semaphore_is_released(s);
+}
+
+/*
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
+ */
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
+{
+	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+	u32 current_val = __nvgpu_semaphore_read(hw_sema);
+
+	/*
+	 * If the semaphore has already reached the value we would write then
+	 * this is really just a NO-OP. However, the sema value shouldn't be
+	 * more than what we expect to be the max.
+	 */
+
+	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+						     current_val)))
+		return false;
+
+	if (current_val == threshold)
+		return false;
+
+	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+			hw_sema->location.offset, threshold);
+
+	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
+			hw_sema->ch->chid, current_val, threshold);
+
+	return true;
+}
+
+/*
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
+ */
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+		struct nvgpu_semaphore_int *hw_sema)
+{
+	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+
+	/* "s" should be an uninitialized sema. */
+	WARN_ON(s->incremented);
+
+	nvgpu_atomic_set(&s->value, next);
+	s->incremented = 1;
+
+	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+			     hw_sema->ch->chid, next);
+}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9ab6cc67..a7bd79cb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -24,16 +24,13 @@
 #define SEMAPHORE_GK20A_H
 
 #include <nvgpu/log.h>
-#include <nvgpu/timers.h>
 #include <nvgpu/atomic.h>
-#include <nvgpu/bug.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/list.h>
 #include <nvgpu/nvgpu_mem.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-#include "gk20a/channel_gk20a.h"
 
 #define gpu_sema_dbg(g, fmt, args...)		\
 	nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
 
-/*
- * Return the address of a specific semaphore.
- *
- * Don't call this on a semaphore you don't own - the VA returned will make no
- * sense in your specific channel's VM.
- */
-static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
-{
-	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
-		s->location.offset;
-}
-
-/*
- * Get the global RO address for the semaphore. Can be called on any semaphore
- * regardless of whether you own it.
- */
-static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
-{
-	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
-		s->location.offset;
-}
-
-static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
-{
-	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
-		hw_sema->location.offset;
-}
-
-static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
-{
-	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-			hw_sema->location.offset);
-}
-
-/*
- * Read the underlying value from a semaphore.
- */
-static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
-{
-	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
-			s->location.offset);
-}
-
-/*
- * Check if "racer" is over "goal" with wraparound handling.
- */
-static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
-{
-	/*
-	 * Handle wraparound with the same heuristic as the hardware does:
-	 * although the integer will eventually wrap around, consider a sema
-	 * released against a threshold if its value has passed that threshold
-	 * but has not wrapped over half of the u32 range over that threshold;
-	 * such wrapping is unlikely to happen during a sema lifetime.
-	 *
-	 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
-	 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
-	 * then it needs 0x80000000 increments to wrap again and signal.
-	 *
-	 * Unsigned arithmetic is used because it's well-defined. This is
-	 * effectively the same as: signed_racer - signed_goal > 0.
-	 */
-
-	return racer - goal < 0x80000000;
-}
-
-static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
-{
-	return (u32)nvgpu_atomic_read(&s->value);
-}
-
-static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
-{
-	u32 sema_val = nvgpu_semaphore_read(s);
-	u32 wait_payload = nvgpu_semaphore_get_value(s);
-
-	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
-}
-
-static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
-{
-	return !nvgpu_semaphore_is_released(s);
-}
-
-/*
- * Fast-forward the hw sema to its tracked max value.
- *
- * Return true if the sema wasn't at the max value and needed updating, false
- * otherwise.
- */
-static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
-{
-	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
-	u32 current_val = __nvgpu_semaphore_read(hw_sema);
-
-	/*
-	 * If the semaphore has already reached the value we would write then
-	 * this is really just a NO-OP. However, the sema value shouldn't be
-	 * more than what we expect to be the max.
-	 */
-
-	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
-						     current_val)))
-		return false;
-
-	if (current_val == threshold)
-		return false;
-
-	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-			hw_sema->location.offset, threshold);
-
-	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
-			hw_sema->ch->chid, current_val, threshold);
-
-	return true;
-}
-
-/*
- * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
- * value to "s" which represents the increment that the caller must write in a
- * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
- * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
- */
-static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
-		struct nvgpu_semaphore_int *hw_sema)
-{
-	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
 
-	/* "s" should be an uninitialized sema. */
-	WARN_ON(s->incremented);
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
 
-	nvgpu_atomic_set(&s->value, next);
-	s->incremented = 1;
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+		struct nvgpu_semaphore_int *hw_sema);
 
-	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
-			     hw_sema->ch->chid, next);
-}
 #endif
-- 
cgit v1.2.2