From 4320877eb49ef16a5620536427f77de44bee12b7 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 1 Feb 2018 17:10:55 +0200
Subject: gpu: nvgpu: handle semaphore wraparound

Compare gpu semaphores in the kernel in the same way as the hardware
does: released if value is over threshold, but at most half of u32's
range. This makes it possible to skip zeroing the sema values when semas
are allocated, so that they'd be just monotonically increasing numbers
like syncpoints are.

Jira NVGPU-514

Change-Id: I3bae352fbacfe9690666765b9ecdeae6f0813ea1
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1652086
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 54 +++++++++++++++++++----------
 1 file changed, 36 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index a4af1ca3..9c74d300 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -230,23 +230,26 @@ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
 }
 
 /*
- * TODO: handle wrap around... Hmm, how to do this?
+ * Check if "racer" is over "goal" with wraparound handling.
  */
-static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
 {
-	u32 sema_val = nvgpu_semaphore_read(s);
-
 	/*
-	 * If the underlying semaphore value is greater than or equal to
-	 * the value of the semaphore then the semaphore has been signaled
-	 * (a.k.a. released).
+	 * Handle wraparound with the same heuristic as the hardware does:
+	 * although the integer will eventually wrap around, consider a sema
+	 * released against a threshold if its value has passed that threshold
+	 * but has not wrapped over half of the u32 range over that threshold;
+	 * such wrapping is unlikely to happen during a sema lifetime.
+	 *
+	 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+	 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+	 * then it needs 0x80000000 increments to wrap again and signal.
+	 *
+	 * Unsigned arithmetic is used because it's well-defined. This is
+	 * effectively the same as: signed_racer - signed_goal > 0.
 	 */
-	return (int)sema_val >= nvgpu_atomic_read(&s->value);
-}
 
-static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
-{
-	return !nvgpu_semaphore_is_released(s);
+	return racer - goal < 0x80000000;
 }
 
 static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
@@ -254,6 +257,19 @@ static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
 	return (u32)nvgpu_atomic_read(&s->value);
 }
 
+static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+	u32 sema_val = nvgpu_semaphore_read(s);
+	u32 wait_payload = nvgpu_semaphore_get_value(s);
+
+	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+
+static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+	return !nvgpu_semaphore_is_released(s);
+}
+
 static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
 {
 	return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
@@ -261,14 +277,14 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
 
 /*
  * If @force is set then this will not wait for the underlying semaphore to
- * catch up to the passed semaphore.
+ * catch up to the passed semaphore threshold.
  */
 static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
 					     bool force)
 {
 	struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
 	u32 current_val;
-	u32 val = nvgpu_semaphore_get_value(s);
+	u32 threshold = nvgpu_semaphore_get_value(s);
 	int attempts = 0;
 
 	/*
@@ -277,7 +293,8 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
 	 *
 	 * TODO: tune the wait a little better.
 	 */
-	while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) {
+	while (!__nvgpu_semaphore_value_released(threshold - 1,
+			current_val = nvgpu_semaphore_read(s))) {
 		if (force)
 			break;
 		nvgpu_msleep(100);
@@ -292,13 +309,14 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
 	 * If the semaphore has already passed the value we would write then
 	 * this is really just a NO-OP.
 	 */
-	if (current_val >= val)
+	if (__nvgpu_semaphore_value_released(threshold, current_val))
 		return;
 
-	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val);
+	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset,
+			threshold);
 
 	gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a,
-			     "(c=%d) WRITE %u", hw_sema->ch->chid, val);
+			     "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
 }
 
 static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
-- 
cgit v1.2.2