gpu: nvgpu: move semaphore code from header

Move the inlined semaphore functions from semaphore.h to semaphore.c. Some of those have dependencies that we'd like to avoid in the header file, and none of them really need to be inline. Jira NVGPU-967 Change-Id: I40770b924c6eb54034c31e1a99b83daaa9e6a4b7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1804610 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2018-08-22 08:36:04 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-08-24 17:57:47 -0400
commit: cee984b00600e02d702a0b5f88f024627a76de23 (patch)
tree: 9197a1c19d9d9b512b31cc7f70f615c7af66f0fd /drivers/gpu/nvgpu/include
parent: 390185200fd505fb0b36bed993d67f1a21f47c24 (diff)
1 files changed, 11 insertions, 137 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9ab6cc67..a7bd79cb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -24,16 +24,13 @@
 #define SEMAPHORE_GK20A_H
 #include <nvgpu/log.h>
-#include <nvgpu/timers.h>
 #include <nvgpu/atomic.h>
-#include <nvgpu/bug.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/list.h>
 #include <nvgpu/nvgpu_mem.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-#include "gk20a/channel_gk20a.h"
 #define gpu_sema_dbg(g, fmt, args...)           \
        nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
-/*
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
- * Return the address of a specific semaphore.
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
- *
+u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
- * Don't call this on a semaphore you don't own - the VA returned will make no
- * sense in your specific channel's VM.
- */
-static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
-{
-        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
-                s->location.offset;
-}
-/*
- * Get the global RO address for the semaphore. Can be called on any semaphore
- * regardless of whether you own it.
- */
-static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
-{
-        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
-                s->location.offset;
-}
-static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
-{
-        return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
-                hw_sema->location.offset;
-}
-static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
-{
-        return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-                        hw_sema->location.offset);
-}
-/*
- * Read the underlying value from a semaphore.
- */
-static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
-{
-        return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
-                        s->location.offset);
-}
-/*
- * Check if "racer" is over "goal" with wraparound handling.
- */
-static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
-{
-        /*
-         * Handle wraparound with the same heuristic as the hardware does:
-         * although the integer will eventually wrap around, consider a sema
-         * released against a threshold if its value has passed that threshold
-         * but has not wrapped over half of the u32 range over that threshold;
-         * such wrapping is unlikely to happen during a sema lifetime.
-         *
-         * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
-         * precisely half of the 32-bit space. If racer == goal + 0x80000000,
-         * then it needs 0x80000000 increments to wrap again and signal.
-         *
-         * Unsigned arithmetic is used because it's well-defined. This is
-         * effectively the same as: signed_racer - signed_goal > 0.
-         */
-        return racer - goal < 0x80000000;
-}
-static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
-{
-        return (u32)nvgpu_atomic_read(&s->value);
-}
-static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
-{
-        u32 sema_val = nvgpu_semaphore_read(s);
-        u32 wait_payload = nvgpu_semaphore_get_value(s);
-        return __nvgpu_semaphore_value_released(wait_payload, sema_val);
-}
-static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
-{
-        return !nvgpu_semaphore_is_released(s);
-}
-/*
- * Fast-forward the hw sema to its tracked max value.
- *
- * Return true if the sema wasn't at the max value and needed updating, false
- * otherwise.
- */
-static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
-{
-        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
-        u32 current_val = __nvgpu_semaphore_read(hw_sema);
-        /*
-         * If the semaphore has already reached the value we would write then
-         * this is really just a NO-OP. However, the sema value shouldn't be
-         * more than what we expect to be the max.
-         */
-        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
-                                                     current_val)))
-                return false;
-        if (current_val == threshold)
-                return false;
-        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-                        hw_sema->location.offset, threshold);
-        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
-                        hw_sema->ch->chid, current_val, threshold);
-        return true;
-}
-/*
- * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
- * value to "s" which represents the increment that the caller must write in a
- * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
- * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
- */
-static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
-                struct nvgpu_semaphore_int *hw_sema)
-{
-        int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
-        /* "s" should be an uninitialized sema. */
+u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
-        WARN_ON(s->incremented);
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
-        nvgpu_atomic_set(&s->value, next);
+bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
-        s->incremented = 1;
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+                struct nvgpu_semaphore_int *hw_sema);
-        gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
-                             hw_sema->ch->chid, next);
-}
 #endif
author	Konsta Holtta <kholtta@nvidia.com>	2018-08-22 08:36:04 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-08-24 17:57:47 -0400
commit	cee984b00600e02d702a0b5f88f024627a76de23 (patch)
tree	9197a1c19d9d9b512b31cc7f70f615c7af66f0fd /drivers/gpu/nvgpu/include
parent	390185200fd505fb0b36bed993d67f1a21f47c24 (diff)

diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 9ab6cc67..a7bd79cb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -24,16 +24,13 @@
24	#define SEMAPHORE_GK20A_H	24	#define SEMAPHORE_GK20A_H
25		25
26	#include <nvgpu/log.h>	26	#include <nvgpu/log.h>
27	#include <nvgpu/timers.h>
28	#include <nvgpu/atomic.h>	27	#include <nvgpu/atomic.h>
29	#include <nvgpu/bug.h>
30	#include <nvgpu/kref.h>	28	#include <nvgpu/kref.h>
31	#include <nvgpu/list.h>	29	#include <nvgpu/list.h>
32	#include <nvgpu/nvgpu_mem.h>	30	#include <nvgpu/nvgpu_mem.h>
33		31
34	#include "gk20a/gk20a.h"	32	#include "gk20a/gk20a.h"
35	#include "gk20a/mm_gk20a.h"	33	#include "gk20a/mm_gk20a.h"
36	#include "gk20a/channel_gk20a.h"
37		34
38	#define gpu_sema_dbg(g, fmt, args...) \	35	#define gpu_sema_dbg(g, fmt, args...) \
39	nvgpu_log(g, gpu_dbg_sema, fmt, ##args)	36	nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
191	void nvgpu_semaphore_get(struct nvgpu_semaphore *s);	188	void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
192	void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);	189	void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
193		190
194	/*	191	u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
195	* Return the address of a specific semaphore.	192	u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
196	*	193	u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
197	* Don't call this on a semaphore you don't own - the VA returned will make no
198	* sense in your specific channel's VM.
199	*/
200	static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
201	{
202	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
203	s->location.offset;
204	}
205
206	/*
207	* Get the global RO address for the semaphore. Can be called on any semaphore
208	* regardless of whether you own it.
209	*/
210	static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
211	{
212	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
213	s->location.offset;
214	}
215
216	static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
217	{
218	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
219	hw_sema->location.offset;
220	}
221
222	static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
223	{
224	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
225	hw_sema->location.offset);
226	}
227
228	/*
229	* Read the underlying value from a semaphore.
230	*/
231	static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
232	{
233	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
234	s->location.offset);
235	}
236
237	/*
238	* Check if "racer" is over "goal" with wraparound handling.
239	*/
240	static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
241	{
242	/*
243	* Handle wraparound with the same heuristic as the hardware does:
244	* although the integer will eventually wrap around, consider a sema
245	* released against a threshold if its value has passed that threshold
246	* but has not wrapped over half of the u32 range over that threshold;
247	* such wrapping is unlikely to happen during a sema lifetime.
248	*
249	* Values for [goal, goal + 0x7fffffff] are considered signaled; that's
250	* precisely half of the 32-bit space. If racer == goal + 0x80000000,
251	* then it needs 0x80000000 increments to wrap again and signal.
252	*
253	* Unsigned arithmetic is used because it's well-defined. This is
254	* effectively the same as: signed_racer - signed_goal > 0.
255	*/
256
257	return racer - goal < 0x80000000;
258	}
259
260	static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
261	{
262	return (u32)nvgpu_atomic_read(&s->value);
263	}
264
265	static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
266	{
267	u32 sema_val = nvgpu_semaphore_read(s);
268	u32 wait_payload = nvgpu_semaphore_get_value(s);
269
270	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
271	}
272
273	static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
274	{
275	return !nvgpu_semaphore_is_released(s);
276	}
277
278	/*
279	* Fast-forward the hw sema to its tracked max value.
280	*
281	* Return true if the sema wasn't at the max value and needed updating, false
282	* otherwise.
283	*/
284	static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
285	{
286	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
287	u32 current_val = __nvgpu_semaphore_read(hw_sema);
288
289	/*
290	* If the semaphore has already reached the value we would write then
291	* this is really just a NO-OP. However, the sema value shouldn't be
292	* more than what we expect to be the max.
293	*/
294
295	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
296	current_val)))
297	return false;
298
299	if (current_val == threshold)
300	return false;
301
302	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
303	hw_sema->location.offset, threshold);
304
305	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
306	hw_sema->ch->chid, current_val, threshold);
307
308	return true;
309	}
310
311	/*
312	* Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
313	* value to "s" which represents the increment that the caller must write in a
314	* pushbuf. The same nvgpu_semaphore will also represent an output fence; when
315	* nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
316	*/
317	static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
318	struct nvgpu_semaphore_int *hw_sema)
319	{
320	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
321		194
322	/* "s" should be an uninitialized sema. */	195	u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
323	WARN_ON(s->incremented);	196	u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
		197	u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
		198	bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
		199	bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
324		200
325	nvgpu_atomic_set(&s->value, next);	201	bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
326	s->incremented = 1;	202	void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
		203	struct nvgpu_semaphore_int *hw_sema);
327		204
328	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
329	hw_sema->ch->chid, next);
330	}
331	#endif	205	#endif