summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/semaphore.c139
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h148
2 files changed, 150 insertions, 137 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 25bd3be3..5814a737 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -28,6 +28,7 @@
28#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
29#include <nvgpu/bug.h> 29#include <nvgpu/bug.h>
30#include <nvgpu/sizes.h> 30#include <nvgpu/sizes.h>
31#include <nvgpu/channel.h>
31 32
32#include "gk20a/gk20a.h" 33#include "gk20a/gk20a.h"
33#include "gk20a/mm_gk20a.h" 34#include "gk20a/mm_gk20a.h"
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
485{ 486{
486 nvgpu_ref_get(&s->ref); 487 nvgpu_ref_get(&s->ref);
487} 488}
489
490/*
491 * Return the address of a specific semaphore.
492 *
493 * Don't call this on a semaphore you don't own - the VA returned will make no
494 * sense in your specific channel's VM.
495 */
496u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
497{
498 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
499 s->location.offset;
500}
501
502/*
503 * Get the global RO address for the semaphore. Can be called on any semaphore
504 * regardless of whether you own it.
505 */
506u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
507{
508 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
509 s->location.offset;
510}
511
512u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
513{
514 return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
515 hw_sema->location.offset;
516}
517
518u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
519{
520 return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
521 hw_sema->location.offset);
522}
523
524/*
525 * Read the underlying value from a semaphore.
526 */
527u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
528{
529 return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
530 s->location.offset);
531}
532
533/*
534 * Check if "racer" is over "goal" with wraparound handling.
535 */
536static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
537{
538 /*
539 * Handle wraparound with the same heuristic as the hardware does:
540 * although the integer will eventually wrap around, consider a sema
541 * released against a threshold if its value has passed that threshold
542 * but has not wrapped over half of the u32 range over that threshold;
543 * such wrapping is unlikely to happen during a sema lifetime.
544 *
545 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
546 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
547 * then it needs 0x80000000 increments to wrap again and signal.
548 *
549 * Unsigned arithmetic is used because it's well-defined. This is
550 * effectively the same as: signed_racer - signed_goal > 0.
551 */
552
553 return racer - goal < 0x80000000;
554}
555
556u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
557{
558 return (u32)nvgpu_atomic_read(&s->value);
559}
560
561bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
562{
563 u32 sema_val = nvgpu_semaphore_read(s);
564 u32 wait_payload = nvgpu_semaphore_get_value(s);
565
566 return __nvgpu_semaphore_value_released(wait_payload, sema_val);
567}
568
569bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
570{
571 return !nvgpu_semaphore_is_released(s);
572}
573
574/*
575 * Fast-forward the hw sema to its tracked max value.
576 *
577 * Return true if the sema wasn't at the max value and needed updating, false
578 * otherwise.
579 */
580bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
581{
582 u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
583 u32 current_val = __nvgpu_semaphore_read(hw_sema);
584
585 /*
586 * If the semaphore has already reached the value we would write then
587 * this is really just a NO-OP. However, the sema value shouldn't be
588 * more than what we expect to be the max.
589 */
590
591 if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
592 current_val)))
593 return false;
594
595 if (current_val == threshold)
596 return false;
597
598 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
599 hw_sema->location.offset, threshold);
600
601 gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
602 hw_sema->ch->chid, current_val, threshold);
603
604 return true;
605}
606
607/*
608 * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
609 * value to "s" which represents the increment that the caller must write in a
610 * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
611 * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
612 */
613void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
614 struct nvgpu_semaphore_int *hw_sema)
615{
616 int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
617
618 /* "s" should be an uninitialized sema. */
619 WARN_ON(s->incremented);
620
621 nvgpu_atomic_set(&s->value, next);
622 s->incremented = 1;
623
624 gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
625 hw_sema->ch->chid, next);
626}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9ab6cc67..a7bd79cb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -24,16 +24,13 @@
24#define SEMAPHORE_GK20A_H 24#define SEMAPHORE_GK20A_H
25 25
26#include <nvgpu/log.h> 26#include <nvgpu/log.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/atomic.h> 27#include <nvgpu/atomic.h>
29#include <nvgpu/bug.h>
30#include <nvgpu/kref.h> 28#include <nvgpu/kref.h>
31#include <nvgpu/list.h> 29#include <nvgpu/list.h>
32#include <nvgpu/nvgpu_mem.h> 30#include <nvgpu/nvgpu_mem.h>
33 31
34#include "gk20a/gk20a.h" 32#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h" 33#include "gk20a/mm_gk20a.h"
36#include "gk20a/channel_gk20a.h"
37 34
38#define gpu_sema_dbg(g, fmt, args...) \ 35#define gpu_sema_dbg(g, fmt, args...) \
39 nvgpu_log(g, gpu_dbg_sema, fmt, ##args) 36 nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
191void nvgpu_semaphore_get(struct nvgpu_semaphore *s); 188void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
192void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); 189void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
193 190
194/* 191u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
195 * Return the address of a specific semaphore. 192u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
196 * 193u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
197 * Don't call this on a semaphore you don't own - the VA returned will make no
198 * sense in your specific channel's VM.
199 */
200static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
201{
202 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
203 s->location.offset;
204}
205
206/*
207 * Get the global RO address for the semaphore. Can be called on any semaphore
208 * regardless of whether you own it.
209 */
210static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
211{
212 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
213 s->location.offset;
214}
215
216static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
217{
218 return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
219 hw_sema->location.offset;
220}
221
222static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
223{
224 return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
225 hw_sema->location.offset);
226}
227
228/*
229 * Read the underlying value from a semaphore.
230 */
231static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
232{
233 return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
234 s->location.offset);
235}
236
237/*
238 * Check if "racer" is over "goal" with wraparound handling.
239 */
240static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
241{
242 /*
243 * Handle wraparound with the same heuristic as the hardware does:
244 * although the integer will eventually wrap around, consider a sema
245 * released against a threshold if its value has passed that threshold
246 * but has not wrapped over half of the u32 range over that threshold;
247 * such wrapping is unlikely to happen during a sema lifetime.
248 *
249 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
250 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
251 * then it needs 0x80000000 increments to wrap again and signal.
252 *
253 * Unsigned arithmetic is used because it's well-defined. This is
254 * effectively the same as: signed_racer - signed_goal > 0.
255 */
256
257 return racer - goal < 0x80000000;
258}
259
260static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
261{
262 return (u32)nvgpu_atomic_read(&s->value);
263}
264
265static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
266{
267 u32 sema_val = nvgpu_semaphore_read(s);
268 u32 wait_payload = nvgpu_semaphore_get_value(s);
269
270 return __nvgpu_semaphore_value_released(wait_payload, sema_val);
271}
272
273static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
274{
275 return !nvgpu_semaphore_is_released(s);
276}
277
278/*
279 * Fast-forward the hw sema to its tracked max value.
280 *
281 * Return true if the sema wasn't at the max value and needed updating, false
282 * otherwise.
283 */
284static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
285{
286 u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
287 u32 current_val = __nvgpu_semaphore_read(hw_sema);
288
289 /*
290 * If the semaphore has already reached the value we would write then
291 * this is really just a NO-OP. However, the sema value shouldn't be
292 * more than what we expect to be the max.
293 */
294
295 if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
296 current_val)))
297 return false;
298
299 if (current_val == threshold)
300 return false;
301
302 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
303 hw_sema->location.offset, threshold);
304
305 gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
306 hw_sema->ch->chid, current_val, threshold);
307
308 return true;
309}
310
311/*
312 * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
313 * value to "s" which represents the increment that the caller must write in a
314 * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
315 * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
316 */
317static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
318 struct nvgpu_semaphore_int *hw_sema)
319{
320 int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
321 194
322 /* "s" should be an uninitialized sema. */ 195u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
323 WARN_ON(s->incremented); 196u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
197u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
198bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
199bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
324 200
325 nvgpu_atomic_set(&s->value, next); 201bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
326 s->incremented = 1; 202void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
203 struct nvgpu_semaphore_int *hw_sema);
327 204
328 gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
329 hw_sema->ch->chid, next);
330}
331#endif 205#endif