diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/semaphore.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/semaphore.c | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 25bd3be3..5814a737 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <nvgpu/kmem.h> | 28 | #include <nvgpu/kmem.h> |
29 | #include <nvgpu/bug.h> | 29 | #include <nvgpu/bug.h> |
30 | #include <nvgpu/sizes.h> | 30 | #include <nvgpu/sizes.h> |
31 | #include <nvgpu/channel.h> | ||
31 | 32 | ||
32 | #include "gk20a/gk20a.h" | 33 | #include "gk20a/gk20a.h" |
33 | #include "gk20a/mm_gk20a.h" | 34 | #include "gk20a/mm_gk20a.h" |
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s) | |||
485 | { | 486 | { |
486 | nvgpu_ref_get(&s->ref); | 487 | nvgpu_ref_get(&s->ref); |
487 | } | 488 | } |
489 | |||
490 | /* | ||
491 | * Return the address of a specific semaphore. | ||
492 | * | ||
493 | * Don't call this on a semaphore you don't own - the VA returned will make no | ||
494 | * sense in your specific channel's VM. | ||
495 | */ | ||
496 | u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) | ||
497 | { | ||
498 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + | ||
499 | s->location.offset; | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Get the global RO address for the semaphore. Can be called on any semaphore | ||
504 | * regardless of whether you own it. | ||
505 | */ | ||
506 | u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) | ||
507 | { | ||
508 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + | ||
509 | s->location.offset; | ||
510 | } | ||
511 | |||
512 | u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) | ||
513 | { | ||
514 | return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + | ||
515 | hw_sema->location.offset; | ||
516 | } | ||
517 | |||
518 | u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) | ||
519 | { | ||
520 | return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, | ||
521 | hw_sema->location.offset); | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * Read the underlying value from a semaphore. | ||
526 | */ | ||
527 | u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | ||
528 | { | ||
529 | return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, | ||
530 | s->location.offset); | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Check if "racer" is over "goal" with wraparound handling. | ||
535 | */ | ||
536 | static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) | ||
537 | { | ||
538 | /* | ||
539 | * Handle wraparound with the same heuristic as the hardware does: | ||
540 | * although the integer will eventually wrap around, consider a sema | ||
541 | * released against a threshold if its value has passed that threshold | ||
542 | * but has not wrapped over half of the u32 range over that threshold; | ||
543 | * such wrapping is unlikely to happen during a sema lifetime. | ||
544 | * | ||
545 | * Values for [goal, goal + 0x7fffffff] are considered signaled; that's | ||
546 | * precisely half of the 32-bit space. If racer == goal + 0x80000000, | ||
547 | * then it needs 0x80000000 increments to wrap again and signal. | ||
548 | * | ||
549 | * Unsigned arithmetic is used because it's well-defined. This is | ||
550 | * effectively the same as: signed_racer - signed_goal > 0. | ||
551 | */ | ||
552 | |||
553 | return racer - goal < 0x80000000; | ||
554 | } | ||
555 | |||
556 | u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | ||
557 | { | ||
558 | return (u32)nvgpu_atomic_read(&s->value); | ||
559 | } | ||
560 | |||
561 | bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | ||
562 | { | ||
563 | u32 sema_val = nvgpu_semaphore_read(s); | ||
564 | u32 wait_payload = nvgpu_semaphore_get_value(s); | ||
565 | |||
566 | return __nvgpu_semaphore_value_released(wait_payload, sema_val); | ||
567 | } | ||
568 | |||
569 | bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | ||
570 | { | ||
571 | return !nvgpu_semaphore_is_released(s); | ||
572 | } | ||
573 | |||
574 | /* | ||
575 | * Fast-forward the hw sema to its tracked max value. | ||
576 | * | ||
577 | * Return true if the sema wasn't at the max value and needed updating, false | ||
578 | * otherwise. | ||
579 | */ | ||
580 | bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema) | ||
581 | { | ||
582 | u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value); | ||
583 | u32 current_val = __nvgpu_semaphore_read(hw_sema); | ||
584 | |||
585 | /* | ||
586 | * If the semaphore has already reached the value we would write then | ||
587 | * this is really just a NO-OP. However, the sema value shouldn't be | ||
588 | * more than what we expect to be the max. | ||
589 | */ | ||
590 | |||
591 | if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1, | ||
592 | current_val))) | ||
593 | return false; | ||
594 | |||
595 | if (current_val == threshold) | ||
596 | return false; | ||
597 | |||
598 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, | ||
599 | hw_sema->location.offset, threshold); | ||
600 | |||
601 | gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u", | ||
602 | hw_sema->ch->chid, current_val, threshold); | ||
603 | |||
604 | return true; | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold | ||
609 | * value to "s" which represents the increment that the caller must write in a | ||
610 | * pushbuf. The same nvgpu_semaphore will also represent an output fence; when | ||
611 | * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment. | ||
612 | */ | ||
613 | void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, | ||
614 | struct nvgpu_semaphore_int *hw_sema) | ||
615 | { | ||
616 | int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); | ||
617 | |||
618 | /* "s" should be an uninitialized sema. */ | ||
619 | WARN_ON(s->incremented); | ||
620 | |||
621 | nvgpu_atomic_set(&s->value, next); | ||
622 | s->incremented = 1; | ||
623 | |||
624 | gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", | ||
625 | hw_sema->ch->chid, next); | ||
626 | } | ||