summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-08-22 08:36:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-08-24 17:57:47 -0400
commitcee984b00600e02d702a0b5f88f024627a76de23 (patch)
tree9197a1c19d9d9b512b31cc7f70f615c7af66f0fd
parent390185200fd505fb0b36bed993d67f1a21f47c24 (diff)
gpu: nvgpu: move semaphore code from header
Move the inlined semaphore functions from semaphore.h to semaphore.c. Some of those have dependencies that we'd like to avoid in the header file, and none of them really need to be inline. Jira NVGPU-967 Change-Id: I40770b924c6eb54034c31e1a99b83daaa9e6a4b7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1804610 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/semaphore.c139
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h148
2 files changed, 150 insertions, 137 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 25bd3be3..5814a737 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
28#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
29#include <nvgpu/bug.h> 29#include <nvgpu/bug.h>
30#include <nvgpu/sizes.h> 30#include <nvgpu/sizes.h>
31#include <nvgpu/channel.h>
31 32
32#include "gk20a/gk20a.h" 33#include "gk20a/gk20a.h"
33#include "gk20a/mm_gk20a.h" 34#include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
485{ 486{
486 nvgpu_ref_get(&s->ref); 487 nvgpu_ref_get(&s->ref);
487} 488}
489
490/*
491 * Return the address of a specific semaphore.
492 *
493 * Don't call this on a semaphore you don't own - the VA returned will make no
494 * sense in your specific channel's VM.
495 */
496u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
497{
498 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
499 s->location.offset;
500}
501
502/*
503 * Get the global RO address for the semaphore. Can be called on any semaphore
504 * regardless of whether you own it.
505 */
506u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
507{
508 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
509 s->location.offset;
510}
511
512u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
513{
514 return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
515 hw_sema->location.offset;
516}
517
518u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
519{
520 return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
521 hw_sema->location.offset);
522}
523
524/*
525 * Read the underlying value from a semaphore.
526 */
527u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
528{
529 return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
530 s->location.offset);
531}
532
533/*
534 * Check if "racer" is over "goal" with wraparound handling.
535 */
536static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
537{
538 /*
539 * Handle wraparound with the same heuristic as the hardware does:
540 * although the integer will eventually wrap around, consider a sema
541 * released against a threshold if its value has passed that threshold
542 * but has not wrapped over half of the u32 range over that threshold;
543 * such wrapping is unlikely to happen during a sema lifetime.
544 *
545 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
546 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
547 * then it needs 0x80000000 increments to wrap again and signal.
548 *
549 * Unsigned arithmetic is used because it's well-defined. This is
550 * effectively the same as: signed_racer - signed_goal > 0.
551 */
552
553 return racer - goal < 0x80000000;
554}
555
556u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
557{
558 return (u32)nvgpu_atomic_read(&s->value);
559}
560
561bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
562{
563 u32 sema_val = nvgpu_semaphore_read(s);
564 u32 wait_payload = nvgpu_semaphore_get_value(s);
565
566 return __nvgpu_semaphore_value_released(wait_payload, sema_val);
567}
568
569bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
570{
571 return !nvgpu_semaphore_is_released(s);
572}
573
574/*
575 * Fast-forward the hw sema to its tracked max value.
576 *
577 * Return true if the sema wasn't at the max value and needed updating, false
578 * otherwise.
579 */
580bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
581{
582 u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
583 u32 current_val = __nvgpu_semaphore_read(hw_sema);
584
585 /*
586 * If the semaphore has already reached the value we would write then
587 * this is really just a NO-OP. However, the sema value shouldn't be
588 * more than what we expect to be the max.
589 */
590
591 if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
592 current_val)))
593 return false;
594
595 if (current_val == threshold)
596 return false;
597
598 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
599 hw_sema->location.offset, threshold);
600
601 gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
602 hw_sema->ch->chid, current_val, threshold);
603
604 return true;
605}
606
607/*
608 * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
609 * value to "s" which represents the increment that the caller must write in a
610 * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
611 * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
612 */
613void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
614 struct nvgpu_semaphore_int *hw_sema)
615{
616 int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
617
618 /* "s" should be an uninitialized sema. */
619 WARN_ON(s->incremented);
620
621 nvgpu_atomic_set(&s->value, next);
622 s->incremented = 1;
623
624 gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
625 hw_sema->ch->chid, next);
626}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9ab6cc67..a7bd79cb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -24,16 +24,13 @@
24#define SEMAPHORE_GK20A_H 24#define SEMAPHORE_GK20A_H
25 25
26#include <nvgpu/log.h> 26#include <nvgpu/log.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/atomic.h> 27#include <nvgpu/atomic.h>
29#include <nvgpu/bug.h>
30#include <nvgpu/kref.h> 28#include <nvgpu/kref.h>
31#include <nvgpu/list.h> 29#include <nvgpu/list.h>
32#include <nvgpu/nvgpu_mem.h> 30#include <nvgpu/nvgpu_mem.h>
33 31
34#include "gk20a/gk20a.h" 32#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h" 33#include "gk20a/mm_gk20a.h"
36#include "gk20a/channel_gk20a.h"
37 34
38#define gpu_sema_dbg(g, fmt, args...) \ 35#define gpu_sema_dbg(g, fmt, args...) \
39 nvgpu_log(g, gpu_dbg_sema, fmt, ##args) 36 nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
191void nvgpu_semaphore_get(struct nvgpu_semaphore *s); 188void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
192void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); 189void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
193 190
194/* 191u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
195 * Return the address of a specific semaphore. 192u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
196 * 193u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
197 * Don't call this on a semaphore you don't own - the VA returned will make no
198 * sense in your specific channel's VM.
199 */
200static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
201{
202 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
203 s->location.offset;
204}
205
206/*
207 * Get the global RO address for the semaphore. Can be called on any semaphore
208 * regardless of whether you own it.
209 */
210static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
211{
212 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
213 s->location.offset;
214}
215
216static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
217{
218 return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
219 hw_sema->location.offset;
220}
221
222static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
223{
224 return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
225 hw_sema->location.offset);
226}
227
228/*
229 * Read the underlying value from a semaphore.
230 */
231static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
232{
233 return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
234 s->location.offset);
235}
236
237/*
238 * Check if "racer" is over "goal" with wraparound handling.
239 */
240static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
241{
242 /*
243 * Handle wraparound with the same heuristic as the hardware does:
244 * although the integer will eventually wrap around, consider a sema
245 * released against a threshold if its value has passed that threshold
246 * but has not wrapped over half of the u32 range over that threshold;
247 * such wrapping is unlikely to happen during a sema lifetime.
248 *
249 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
250 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
251 * then it needs 0x80000000 increments to wrap again and signal.
252 *
253 * Unsigned arithmetic is used because it's well-defined. This is
254 * effectively the same as: signed_racer - signed_goal > 0.
255 */
256
257 return racer - goal < 0x80000000;
258}
259
260static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
261{
262 return (u32)nvgpu_atomic_read(&s->value);
263}
264
265static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
266{
267 u32 sema_val = nvgpu_semaphore_read(s);
268 u32 wait_payload = nvgpu_semaphore_get_value(s);
269
270 return __nvgpu_semaphore_value_released(wait_payload, sema_val);
271}
272
273static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
274{
275 return !nvgpu_semaphore_is_released(s);
276}
277
278/*
279 * Fast-forward the hw sema to its tracked max value.
280 *
281 * Return true if the sema wasn't at the max value and needed updating, false
282 * otherwise.
283 */
284static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
285{
286 u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
287 u32 current_val = __nvgpu_semaphore_read(hw_sema);
288
289 /*
290 * If the semaphore has already reached the value we would write then
291 * this is really just a NO-OP. However, the sema value shouldn't be
292 * more than what we expect to be the max.
293 */
294
295 if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
296 current_val)))
297 return false;
298
299 if (current_val == threshold)
300 return false;
301
302 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
303 hw_sema->location.offset, threshold);
304
305 gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
306 hw_sema->ch->chid, current_val, threshold);
307
308 return true;
309}
310
311/*
312 * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
313 * value to "s" which represents the increment that the caller must write in a
314 * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
315 * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
316 */
317static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
318 struct nvgpu_semaphore_int *hw_sema)
319{
320 int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
321 194
322 /* "s" should be an uninitialized sema. */ 195u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
323 WARN_ON(s->incremented); 196u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
197u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
198bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
199bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
324 200
325 nvgpu_atomic_set(&s->value, next); 201bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
326 s->incremented = 1; 202void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
203 struct nvgpu_semaphore_int *hw_sema);
327 204
328 gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
329 hw_sema->ch->chid, next);
330}
331#endif 205#endif