1 files changed, 139 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 25bd3be3..5814a737 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/sizes.h>
+#include <nvgpu/channel.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
 {
        nvgpu_ref_get(&s->ref);
 }
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
+{
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+                s->location.offset;
+}
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
+{
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+                s->location.offset;
+}
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
+{
+        return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+                hw_sema->location.offset;
+}
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
+{
+        return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+                        hw_sema->location.offset);
+}
+/*
+ * Read the underlying value from a semaphore.
+ */
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
+{
+        return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+                        s->location.offset);
+}
+/*
+ * Check if "racer" is over "goal" with wraparound handling.
+ */
+static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
+{
+        /*
+         * Handle wraparound with the same heuristic as the hardware does:
+         * although the integer will eventually wrap around, consider a sema
+         * released against a threshold if its value has passed that threshold
+         * but has not wrapped over half of the u32 range over that threshold;
+         * such wrapping is unlikely to happen during a sema lifetime.
+         *
+         * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+         * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+         * then it needs 0x80000000 increments to wrap again and signal.
+         *
+         * Unsigned arithmetic is used because it's well-defined. This is
+         * effectively the same as: signed_racer - signed_goal > 0.
+         */
+        return racer - goal < 0x80000000;
+}
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
+{
+        return (u32)nvgpu_atomic_read(&s->value);
+}
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+        u32 sema_val = nvgpu_semaphore_read(s);
+        u32 wait_payload = nvgpu_semaphore_get_value(s);
+        return __nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+        return !nvgpu_semaphore_is_released(s);
+}
+/*
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
+ */
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
+{
+        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+        u32 current_val = __nvgpu_semaphore_read(hw_sema);
+        /*
+         * If the semaphore has already reached the value we would write then
+         * this is really just a NO-OP. However, the sema value shouldn't be
+         * more than what we expect to be the max.
+         */
+        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+                                                     current_val)))
+                return false;
+        if (current_val == threshold)
+                return false;
+        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+                        hw_sema->location.offset, threshold);
+        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
+                        hw_sema->ch->chid, current_val, threshold);
+        return true;
+}
+/*
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
+ */
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+                struct nvgpu_semaphore_int *hw_sema)
+{
+        int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+        /* "s" should be an uninitialized sema. */
+        WARN_ON(s->incremented);
+        nvgpu_atomic_set(&s->value, next);
+        s->incremented = 1;
+        gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+                             hw_sema->ch->chid, next);
+}

diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 25bd3be3..5814a737 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
28	#include <nvgpu/kmem.h>	28	#include <nvgpu/kmem.h>
29	#include <nvgpu/bug.h>	29	#include <nvgpu/bug.h>
30	#include <nvgpu/sizes.h>	30	#include <nvgpu/sizes.h>
		31	#include <nvgpu/channel.h>
31		32
32	#include "gk20a/gk20a.h"	33	#include "gk20a/gk20a.h"
33	#include "gk20a/mm_gk20a.h"	34	#include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
485	{	486	{
486	nvgpu_ref_get(&s->ref);	487	nvgpu_ref_get(&s->ref);
487	}	488	}
		489
		490	/*
		491	* Return the address of a specific semaphore.
		492	*
		493	* Don't call this on a semaphore you don't own - the VA returned will make no
		494	* sense in your specific channel's VM.
		495	*/
		496	u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
		497	{
		498	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
		499	s->location.offset;
		500	}
		501
		502	/*
		503	* Get the global RO address for the semaphore. Can be called on any semaphore
		504	* regardless of whether you own it.
		505	*/
		506	u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
		507	{
		508	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
		509	s->location.offset;
		510	}
		511
		512	u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
		513	{
		514	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
		515	hw_sema->location.offset;
		516	}
		517
		518	u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
		519	{
		520	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
		521	hw_sema->location.offset);
		522	}
		523
		524	/*
		525	* Read the underlying value from a semaphore.
		526	*/
		527	u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
		528	{
		529	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
		530	s->location.offset);
		531	}
		532
		533	/*
		534	* Check if "racer" is over "goal" with wraparound handling.
		535	*/
		536	static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
		537	{
		538	/*
		539	* Handle wraparound with the same heuristic as the hardware does:
		540	* although the integer will eventually wrap around, consider a sema
		541	* released against a threshold if its value has passed that threshold
		542	* but has not wrapped over half of the u32 range over that threshold;
		543	* such wrapping is unlikely to happen during a sema lifetime.
		544	*
		545	* Values for [goal, goal + 0x7fffffff] are considered signaled; that's
		546	* precisely half of the 32-bit space. If racer == goal + 0x80000000,
		547	* then it needs 0x80000000 increments to wrap again and signal.
		548	*
		549	* Unsigned arithmetic is used because it's well-defined. This is
		550	* effectively the same as: signed_racer - signed_goal > 0.
		551	*/
		552
		553	return racer - goal < 0x80000000;
		554	}
		555
		556	u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
		557	{
		558	return (u32)nvgpu_atomic_read(&s->value);
		559	}
		560
		561	bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
		562	{
		563	u32 sema_val = nvgpu_semaphore_read(s);
		564	u32 wait_payload = nvgpu_semaphore_get_value(s);
		565
		566	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
		567	}
		568
		569	bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
		570	{
		571	return !nvgpu_semaphore_is_released(s);
		572	}
		573
		574	/*
		575	* Fast-forward the hw sema to its tracked max value.
		576	*
		577	* Return true if the sema wasn't at the max value and needed updating, false
		578	* otherwise.
		579	*/
		580	bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
		581	{
		582	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
		583	u32 current_val = __nvgpu_semaphore_read(hw_sema);
		584
		585	/*
		586	* If the semaphore has already reached the value we would write then
		587	* this is really just a NO-OP. However, the sema value shouldn't be
		588	* more than what we expect to be the max.
		589	*/
		590
		591	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
		592	current_val)))
		593	return false;
		594
		595	if (current_val == threshold)
		596	return false;
		597
		598	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
		599	hw_sema->location.offset, threshold);
		600
		601	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
		602	hw_sema->ch->chid, current_val, threshold);
		603
		604	return true;
		605	}
		606
		607	/*
		608	* Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
		609	* value to "s" which represents the increment that the caller must write in a
		610	* pushbuf. The same nvgpu_semaphore will also represent an output fence; when
		611	* nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
		612	*/
		613	void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
		614	struct nvgpu_semaphore_int *hw_sema)
		615	{
		616	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
		617
		618	/* "s" should be an uninitialized sema. */
		619	WARN_ON(s->incremented);
		620
		621	nvgpu_atomic_set(&s->value, next);
		622	s->incremented = 1;
		623
		624	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
		625	hw_sema->ch->chid, next);
		626	}