gpu: nvgpu: move semaphore code from header

Move the inlined semaphore functions from semaphore.h to semaphore.c. Some of those have dependencies that we'd like to avoid in the header file, and none of them really need to be inline. Jira NVGPU-967 Change-Id: I40770b924c6eb54034c31e1a99b83daaa9e6a4b7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1804610 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2018-08-22 08:36:04 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-08-24 17:57:47 -0400
commit: cee984b00600e02d702a0b5f88f024627a76de23 (patch)
tree: 9197a1c19d9d9b512b31cc7f70f615c7af66f0fd /drivers/gpu/nvgpu/common
parent: 390185200fd505fb0b36bed993d67f1a21f47c24 (diff)
1 files changed, 139 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 25bd3be3..5814a737 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/sizes.h>
+#include <nvgpu/channel.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
 {
        nvgpu_ref_get(&s->ref);
 }
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
+{
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+                s->location.offset;
+}
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
+{
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+                s->location.offset;
+}
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
+{
+        return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+                hw_sema->location.offset;
+}
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
+{
+        return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+                        hw_sema->location.offset);
+}
+/*
+ * Read the underlying value from a semaphore.
+ */
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
+{
+        return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+                        s->location.offset);
+}
+/*
+ * Check if "racer" is over "goal" with wraparound handling.
+ */
+static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
+{
+        /*
+         * Handle wraparound with the same heuristic as the hardware does:
+         * although the integer will eventually wrap around, consider a sema
+         * released against a threshold if its value has passed that threshold
+         * but has not wrapped over half of the u32 range over that threshold;
+         * such wrapping is unlikely to happen during a sema lifetime.
+         *
+         * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+         * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+         * then it needs 0x80000000 increments to wrap again and signal.
+         *
+         * Unsigned arithmetic is used because it's well-defined. This is
+         * effectively the same as: signed_racer - signed_goal > 0.
+         */
+        return racer - goal < 0x80000000;
+}
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
+{
+        return (u32)nvgpu_atomic_read(&s->value);
+}
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+        u32 sema_val = nvgpu_semaphore_read(s);
+        u32 wait_payload = nvgpu_semaphore_get_value(s);
+        return __nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+        return !nvgpu_semaphore_is_released(s);
+}
+/*
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
+ */
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
+{
+        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+        u32 current_val = __nvgpu_semaphore_read(hw_sema);
+        /*
+         * If the semaphore has already reached the value we would write then
+         * this is really just a NO-OP. However, the sema value shouldn't be
+         * more than what we expect to be the max.
+         */
+        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+                                                     current_val)))
+                return false;
+        if (current_val == threshold)
+                return false;
+        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+                        hw_sema->location.offset, threshold);
+        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
+                        hw_sema->ch->chid, current_val, threshold);
+        return true;
+}
+/*
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
+ */
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+                struct nvgpu_semaphore_int *hw_sema)
+{
+        int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+        /* "s" should be an uninitialized sema. */
+        WARN_ON(s->incremented);
+        nvgpu_atomic_set(&s->value, next);
+        s->incremented = 1;
+        gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+                             hw_sema->ch->chid, next);
+}
author	Konsta Holtta <kholtta@nvidia.com>	2018-08-22 08:36:04 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-08-24 17:57:47 -0400
commit	cee984b00600e02d702a0b5f88f024627a76de23 (patch)
tree	9197a1c19d9d9b512b31cc7f70f615c7af66f0fd /drivers/gpu/nvgpu/common
parent	390185200fd505fb0b36bed993d67f1a21f47c24 (diff)

diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 25bd3be3..5814a737 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
28	#include <nvgpu/kmem.h>	28	#include <nvgpu/kmem.h>
29	#include <nvgpu/bug.h>	29	#include <nvgpu/bug.h>
30	#include <nvgpu/sizes.h>	30	#include <nvgpu/sizes.h>
		31	#include <nvgpu/channel.h>
31		32
32	#include "gk20a/gk20a.h"	33	#include "gk20a/gk20a.h"
33	#include "gk20a/mm_gk20a.h"	34	#include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
485	{	486	{
486	nvgpu_ref_get(&s->ref);	487	nvgpu_ref_get(&s->ref);
487	}	488	}
		489
		490	/*
		491	* Return the address of a specific semaphore.
		492	*
		493	* Don't call this on a semaphore you don't own - the VA returned will make no
		494	* sense in your specific channel's VM.
		495	*/
		496	u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
		497	{
		498	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
		499	s->location.offset;
		500	}
		501
		502	/*
		503	* Get the global RO address for the semaphore. Can be called on any semaphore
		504	* regardless of whether you own it.
		505	*/
		506	u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
		507	{
		508	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
		509	s->location.offset;
		510	}
		511
		512	u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
		513	{
		514	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
		515	hw_sema->location.offset;
		516	}
		517
		518	u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
		519	{
		520	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
		521	hw_sema->location.offset);
		522	}
		523
		524	/*
		525	* Read the underlying value from a semaphore.
		526	*/
		527	u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
		528	{
		529	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
		530	s->location.offset);
		531	}
		532
		533	/*
		534	* Check if "racer" is over "goal" with wraparound handling.
		535	*/
		536	static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
		537	{
		538	/*
		539	* Handle wraparound with the same heuristic as the hardware does:
		540	* although the integer will eventually wrap around, consider a sema
		541	* released against a threshold if its value has passed that threshold
		542	* but has not wrapped over half of the u32 range over that threshold;
		543	* such wrapping is unlikely to happen during a sema lifetime.
		544	*
		545	* Values for [goal, goal + 0x7fffffff] are considered signaled; that's
		546	* precisely half of the 32-bit space. If racer == goal + 0x80000000,
		547	* then it needs 0x80000000 increments to wrap again and signal.
		548	*
		549	* Unsigned arithmetic is used because it's well-defined. This is
		550	* effectively the same as: signed_racer - signed_goal > 0.
		551	*/
		552
		553	return racer - goal < 0x80000000;
		554	}
		555
		556	u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
		557	{
		558	return (u32)nvgpu_atomic_read(&s->value);
		559	}
		560
		561	bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
		562	{
		563	u32 sema_val = nvgpu_semaphore_read(s);
		564	u32 wait_payload = nvgpu_semaphore_get_value(s);
		565
		566	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
		567	}
		568
		569	bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
		570	{
		571	return !nvgpu_semaphore_is_released(s);
		572	}
		573
		574	/*
		575	* Fast-forward the hw sema to its tracked max value.
		576	*
		577	* Return true if the sema wasn't at the max value and needed updating, false
		578	* otherwise.
		579	*/
		580	bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
		581	{
		582	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
		583	u32 current_val = __nvgpu_semaphore_read(hw_sema);
		584
		585	/*
		586	* If the semaphore has already reached the value we would write then
		587	* this is really just a NO-OP. However, the sema value shouldn't be
		588	* more than what we expect to be the max.
		589	*/
		590
		591	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
		592	current_val)))
		593	return false;
		594
		595	if (current_val == threshold)
		596	return false;
		597
		598	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
		599	hw_sema->location.offset, threshold);
		600
		601	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
		602	hw_sema->ch->chid, current_val, threshold);
		603
		604	return true;
		605	}
		606
		607	/*
		608	* Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
		609	* value to "s" which represents the increment that the caller must write in a
		610	* pushbuf. The same nvgpu_semaphore will also represent an output fence; when
		611	* nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
		612	*/
		613	void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
		614	struct nvgpu_semaphore_int *hw_sema)
		615	{
		616	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
		617
		618	/* "s" should be an uninitialized sema. */
		619	WARN_ON(s->incremented);
		620
		621	nvgpu_atomic_set(&s->value, next);
		622	s->incremented = 1;
		623
		624	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
		625	hw_sema->ch->chid, next);
		626	}