2 files changed, 150 insertions, 137 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 25bd3be3..5814a737 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/sizes.h>
+#include <nvgpu/channel.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
 {
        nvgpu_ref_get(&s->ref);
 }
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
+{
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+                s->location.offset;
+}
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
+{
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+                s->location.offset;
+}
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
+{
+        return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+                hw_sema->location.offset;
+}
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
+{
+        return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+                        hw_sema->location.offset);
+}
+/*
+ * Read the underlying value from a semaphore.
+ */
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
+{
+        return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+                        s->location.offset);
+}
+/*
+ * Check if "racer" is over "goal" with wraparound handling.
+ */
+static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
+{
+        /*
+         * Handle wraparound with the same heuristic as the hardware does:
+         * although the integer will eventually wrap around, consider a sema
+         * released against a threshold if its value has passed that threshold
+         * but has not wrapped over half of the u32 range over that threshold;
+         * such wrapping is unlikely to happen during a sema lifetime.
+         *
+         * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+         * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+         * then it needs 0x80000000 increments to wrap again and signal.
+         *
+         * Unsigned arithmetic is used because it's well-defined. This is
+         * effectively the same as: signed_racer - signed_goal > 0.
+         */
+        return racer - goal < 0x80000000;
+}
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
+{
+        return (u32)nvgpu_atomic_read(&s->value);
+}
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+        u32 sema_val = nvgpu_semaphore_read(s);
+        u32 wait_payload = nvgpu_semaphore_get_value(s);
+        return __nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+        return !nvgpu_semaphore_is_released(s);
+}
+/*
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
+ */
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
+{
+        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+        u32 current_val = __nvgpu_semaphore_read(hw_sema);
+        /*
+         * If the semaphore has already reached the value we would write then
+         * this is really just a NO-OP. However, the sema value shouldn't be
+         * more than what we expect to be the max.
+         */
+        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+                                                     current_val)))
+                return false;
+        if (current_val == threshold)
+                return false;
+        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+                        hw_sema->location.offset, threshold);
+        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
+                        hw_sema->ch->chid, current_val, threshold);
+        return true;
+}
+/*
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
+ */
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+                struct nvgpu_semaphore_int *hw_sema)
+{
+        int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+        /* "s" should be an uninitialized sema. */
+        WARN_ON(s->incremented);
+        nvgpu_atomic_set(&s->value, next);
+        s->incremented = 1;
+        gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+                             hw_sema->ch->chid, next);
+}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9ab6cc67..a7bd79cb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -24,16 +24,13 @@
 #define SEMAPHORE_GK20A_H
 #include <nvgpu/log.h>
-#include <nvgpu/timers.h>
 #include <nvgpu/atomic.h>
-#include <nvgpu/bug.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/list.h>
 #include <nvgpu/nvgpu_mem.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-#include "gk20a/channel_gk20a.h"
 #define gpu_sema_dbg(g, fmt, args...)           \
        nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
-/*
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
- * Return the address of a specific semaphore.
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
- *
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
- * Don't call this on a semaphore you don't own - the VA returned will make no
- * sense in your specific channel's VM.
- */
-static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
-{
-        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
-                s->location.offset;
-}
-/*
- * Get the global RO address for the semaphore. Can be called on any semaphore
- * regardless of whether you own it.
- */
-static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
-{
-        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
-                s->location.offset;
-}
-static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
-{
-        return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
-                hw_sema->location.offset;
-}
-static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
-{
-        return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-                        hw_sema->location.offset);
-}
-/*
- * Read the underlying value from a semaphore.
- */
-static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
-{
-        return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
-                        s->location.offset);
-}
-/*
- * Check if "racer" is over "goal" with wraparound handling.
- */
-static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
-{
-        /*
-         * Handle wraparound with the same heuristic as the hardware does:
-         * although the integer will eventually wrap around, consider a sema
-         * released against a threshold if its value has passed that threshold
-         * but has not wrapped over half of the u32 range over that threshold;
-         * such wrapping is unlikely to happen during a sema lifetime.
-         *
-         * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
-         * precisely half of the 32-bit space. If racer == goal + 0x80000000,
-         * then it needs 0x80000000 increments to wrap again and signal.
-         *
-         * Unsigned arithmetic is used because it's well-defined. This is
-         * effectively the same as: signed_racer - signed_goal > 0.
-         */
-        return racer - goal < 0x80000000;
-}
-static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
-{
-        return (u32)nvgpu_atomic_read(&s->value);
-}
-static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
-{
-        u32 sema_val = nvgpu_semaphore_read(s);
-        u32 wait_payload = nvgpu_semaphore_get_value(s);
-        return __nvgpu_semaphore_value_released(wait_payload, sema_val);
-}
-static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
-{
-        return !nvgpu_semaphore_is_released(s);
-}
-/*
- * Fast-forward the hw sema to its tracked max value.
- *
- * Return true if the sema wasn't at the max value and needed updating, false
- * otherwise.
- */
-static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
-{
-        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
-        u32 current_val = __nvgpu_semaphore_read(hw_sema);
-        /*
-         * If the semaphore has already reached the value we would write then
-         * this is really just a NO-OP. However, the sema value shouldn't be
-         * more than what we expect to be the max.
-         */
-        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
-                                                     current_val)))
-                return false;
-        if (current_val == threshold)
-                return false;
-        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-                        hw_sema->location.offset, threshold);
-        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
-                        hw_sema->ch->chid, current_val, threshold);
-        return true;
-}
-/*
- * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
- * value to "s" which represents the increment that the caller must write in a
- * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
- * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
- */
-static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
-                struct nvgpu_semaphore_int *hw_sema)
-{
-        int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
-        /* "s" should be an uninitialized sema. */
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
-        WARN_ON(s->incremented);
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
-        nvgpu_atomic_set(&s->value, next);
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
-        s->incremented = 1;
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+                struct nvgpu_semaphore_int *hw_sema);
-        gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
-                             hw_sema->ch->chid, next);
-}
 #endif