diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 175 |
1 files changed, 82 insertions, 93 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 4b1be8b9..c6b55bf8 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -502,10 +502,10 @@ static void gk20a_channel_semaphore_launcher( | |||
502 | 502 | ||
503 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | 503 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, |
504 | struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, | 504 | struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, |
505 | int cmd_size, bool acquire, bool wfi) | 505 | u32 offset, bool acquire, bool wfi) |
506 | { | 506 | { |
507 | int ch = c->chid; | 507 | int ch = c->chid; |
508 | u32 ob, off = cmd->off; | 508 | u32 ob, off = cmd->off + offset; |
509 | u64 va; | 509 | u64 va; |
510 | 510 | ||
511 | ob = off; | 511 | ob = off; |
@@ -588,108 +588,79 @@ static int gk20a_channel_semaphore_wait_syncpt( | |||
588 | } | 588 | } |
589 | 589 | ||
590 | #ifdef CONFIG_SYNC | 590 | #ifdef CONFIG_SYNC |
591 | /* | 591 | static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd, |
592 | * Attempt a fast path for waiting on a sync_fence. Basically if the passed | 592 | struct priv_cmd_entry *wait_cmd) |
593 | * sync_fence is backed by a nvgpu_semaphore then there's no reason to go | ||
594 | * through the rigmarole of setting up a separate semaphore which waits on an | ||
595 | * interrupt from the GPU and then triggers a worker thread to execute a SW | ||
596 | * based semaphore release. Instead just have the GPU wait on the same semaphore | ||
597 | * that is going to be incremented by the GPU. | ||
598 | * | ||
599 | * This function returns 2 possible values: -ENODEV or 0 on success. In the case | ||
600 | * of -ENODEV the fastpath cannot be taken due to the fence not being backed by | ||
601 | * a GPU semaphore. | ||
602 | */ | ||
603 | static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | ||
604 | struct sync_fence *fence, | ||
605 | struct priv_cmd_entry *wait_cmd, | ||
606 | struct nvgpu_semaphore **fp_sema) | ||
607 | { | 593 | { |
608 | struct nvgpu_semaphore *sema; | 594 | struct sync_fence *sync_fence; |
609 | int err; | 595 | int err; |
596 | const int wait_cmd_size = 8; | ||
597 | int num_wait_cmds; | ||
598 | int i; | ||
610 | 599 | ||
611 | if (!gk20a_is_sema_backed_sync_fence(fence)) | 600 | sync_fence = gk20a_sync_fence_fdget(fd); |
612 | return -ENODEV; | 601 | if (!sync_fence) |
613 | 602 | return -EINVAL; | |
614 | sema = gk20a_sync_fence_get_sema(fence); | ||
615 | 603 | ||
616 | /* | 604 | num_wait_cmds = sync_fence->num_fences; |
617 | * If there's no underlying sema then that means the underlying sema has | 605 | if (num_wait_cmds == 0) { |
618 | * already signaled. | 606 | err = 0; |
619 | */ | 607 | goto put_fence; |
620 | if (!sema) { | ||
621 | *fp_sema = NULL; | ||
622 | return 0; | ||
623 | } | 608 | } |
624 | 609 | ||
625 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); | 610 | err = gk20a_channel_alloc_priv_cmdbuf(c, |
626 | if (err) | 611 | wait_cmd_size * num_wait_cmds, |
627 | return err; | 612 | wait_cmd); |
613 | if (err) { | ||
614 | nvgpu_err(c->g, "not enough priv cmd buffer space"); | ||
615 | goto put_fence; | ||
616 | } | ||
628 | 617 | ||
629 | nvgpu_semaphore_get(sema); | 618 | for (i = 0; i < sync_fence->num_fences; i++) { |
630 | BUG_ON(!sema->incremented); | 619 | struct fence *f = sync_fence->cbs[i].sync_pt; |
631 | add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false); | 620 | struct sync_pt *pt = sync_pt_from_fence(f); |
621 | struct nvgpu_semaphore *sema; | ||
632 | 622 | ||
633 | /* | 623 | sema = gk20a_sync_pt_sema(pt); |
634 | * Make sure that gk20a_channel_semaphore_wait_fd() can create another | 624 | if (!sema) { |
635 | * fence with the underlying semaphore. | 625 | /* expired */ |
636 | */ | 626 | nvgpu_memset(c->g, wait_cmd->mem, |
637 | *fp_sema = sema; | 627 | (wait_cmd->off + i * wait_cmd_size) * sizeof(u32), |
628 | 0, wait_cmd_size * sizeof(u32)); | ||
629 | } else { | ||
630 | WARN_ON(!sema->incremented); | ||
631 | add_sema_cmd(c->g, c, sema, wait_cmd, | ||
632 | i * wait_cmd_size, true, false); | ||
633 | nvgpu_semaphore_put(sema); | ||
634 | } | ||
635 | } | ||
638 | 636 | ||
639 | return 0; | 637 | put_fence: |
638 | sync_fence_put(sync_fence); | ||
639 | return err; | ||
640 | } | 640 | } |
641 | #endif | ||
642 | 641 | ||
643 | static int gk20a_channel_semaphore_wait_fd( | 642 | static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd, |
644 | struct gk20a_channel_sync *s, int fd, | 643 | struct priv_cmd_entry *wait_cmd, |
645 | struct priv_cmd_entry *entry, | 644 | struct gk20a_fence *fence_out, |
646 | struct gk20a_fence *fence) | 645 | struct sync_timeline *timeline) |
647 | { | 646 | { |
648 | struct gk20a_channel_semaphore *sema = | 647 | const int wait_cmd_size = 8; |
649 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
650 | struct channel_gk20a *c = sema->c; | ||
651 | #ifdef CONFIG_SYNC | ||
652 | struct nvgpu_semaphore *fp_sema; | ||
653 | struct sync_fence *sync_fence; | 648 | struct sync_fence *sync_fence; |
654 | struct priv_cmd_entry *wait_cmd = entry; | ||
655 | struct wait_fence_work *w = NULL; | 649 | struct wait_fence_work *w = NULL; |
656 | int err, ret, status; | 650 | int err, status; |
657 | 651 | ||
658 | sync_fence = gk20a_sync_fence_fdget(fd); | 652 | sync_fence = sync_fence_fdget(fd); |
659 | if (!sync_fence) | 653 | if (!sync_fence) |
660 | return -EINVAL; | 654 | return -EINVAL; |
661 | 655 | ||
662 | ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema); | ||
663 | if (ret == 0) { | ||
664 | if (fp_sema) { | ||
665 | err = gk20a_fence_from_semaphore(c->g, fence, | ||
666 | sema->timeline, | ||
667 | fp_sema, | ||
668 | &c->semaphore_wq, | ||
669 | false); | ||
670 | if (err) { | ||
671 | nvgpu_semaphore_put(fp_sema); | ||
672 | goto clean_up_priv_cmd; | ||
673 | } | ||
674 | } else | ||
675 | /* | ||
676 | * Init an empty fence. It will instantly return | ||
677 | * from gk20a_fence_wait(). | ||
678 | */ | ||
679 | gk20a_init_fence(fence, NULL, NULL); | ||
680 | |||
681 | sync_fence_put(sync_fence); | ||
682 | goto skip_slow_path; | ||
683 | } | ||
684 | |||
685 | /* If the fence has signaled there is no reason to wait on it. */ | 656 | /* If the fence has signaled there is no reason to wait on it. */ |
686 | status = atomic_read(&sync_fence->status); | 657 | status = atomic_read(&sync_fence->status); |
687 | if (status == 0) { | 658 | if (status == 0) { |
688 | sync_fence_put(sync_fence); | 659 | sync_fence_put(sync_fence); |
689 | goto skip_slow_path; | 660 | return 0; |
690 | } | 661 | } |
691 | 662 | ||
692 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); | 663 | err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd); |
693 | if (err) { | 664 | if (err) { |
694 | nvgpu_err(c->g, | 665 | nvgpu_err(c->g, |
695 | "not enough priv cmd buffer space"); | 666 | "not enough priv cmd buffer space"); |
@@ -718,34 +689,34 @@ static int gk20a_channel_semaphore_wait_fd( | |||
718 | nvgpu_semaphore_incr(w->sema, c->hw_sema); | 689 | nvgpu_semaphore_incr(w->sema, c->hw_sema); |
719 | 690 | ||
720 | /* GPU unblocked when the semaphore value increments. */ | 691 | /* GPU unblocked when the semaphore value increments. */ |
721 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); | 692 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false); |
722 | 693 | ||
723 | /* | 694 | /* |
724 | * We need to create the fence before adding the waiter to ensure | 695 | * We need to create the fence before adding the waiter to ensure |
725 | * that we properly clean up in the event the sync_fence has | 696 | * that we properly clean up in the event the sync_fence has |
726 | * already signaled | 697 | * already signaled |
727 | */ | 698 | */ |
728 | err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, | 699 | err = gk20a_fence_from_semaphore(c->g, fence_out, timeline, |
729 | &c->semaphore_wq, false); | 700 | w->sema, &c->semaphore_wq, false); |
730 | if (err) | 701 | if (err) |
731 | goto clean_up_sema; | 702 | goto clean_up_sema; |
732 | 703 | ||
733 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | 704 | err = sync_fence_wait_async(sync_fence, &w->waiter); |
734 | gk20a_add_pending_sema_wait(c->g, w); | 705 | gk20a_add_pending_sema_wait(c->g, w); |
735 | 706 | ||
736 | /* | 707 | /* |
737 | * If the sync_fence has already signaled then the above async_wait | 708 | * If the sync_fence has already signaled then the above wait_async |
738 | * will never trigger. This causes the semaphore release op to never | 709 | * will not get scheduled; the fence completed just after doing the |
739 | * happen which, in turn, hangs the GPU. That's bad. So let's just | 710 | * status check above before allocs and waiter init, and won the race. |
740 | * do the nvgpu_semaphore_release() right now. | 711 | * This causes the waiter to be skipped, so let's release the semaphore |
712 | * here and put the refs taken for the worker. | ||
741 | */ | 713 | */ |
742 | if (ret == 1) { | 714 | if (err == 1) { |
743 | sync_fence_put(sync_fence); | 715 | sync_fence_put(sync_fence); |
744 | nvgpu_semaphore_release(w->sema, c->hw_sema); | 716 | nvgpu_semaphore_release(w->sema, c->hw_sema); |
745 | nvgpu_semaphore_put(w->sema); | 717 | nvgpu_semaphore_put(w->sema); |
746 | } | 718 | } |
747 | 719 | ||
748 | skip_slow_path: | ||
749 | return 0; | 720 | return 0; |
750 | 721 | ||
751 | clean_up_sema: | 722 | clean_up_sema: |
@@ -758,10 +729,28 @@ clean_up_sema: | |||
758 | clean_up_worker: | 729 | clean_up_worker: |
759 | nvgpu_kfree(c->g, w); | 730 | nvgpu_kfree(c->g, w); |
760 | clean_up_priv_cmd: | 731 | clean_up_priv_cmd: |
761 | gk20a_free_priv_cmdbuf(c, entry); | 732 | gk20a_free_priv_cmdbuf(c, wait_cmd); |
762 | clean_up_sync_fence: | 733 | clean_up_sync_fence: |
763 | sync_fence_put(sync_fence); | 734 | sync_fence_put(sync_fence); |
764 | return err; | 735 | return err; |
736 | } | ||
737 | #endif | ||
738 | |||
739 | static int gk20a_channel_semaphore_wait_fd( | ||
740 | struct gk20a_channel_sync *s, int fd, | ||
741 | struct priv_cmd_entry *entry, | ||
742 | struct gk20a_fence *fence) | ||
743 | { | ||
744 | struct gk20a_channel_semaphore *sema = | ||
745 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
746 | struct channel_gk20a *c = sema->c; | ||
747 | #ifdef CONFIG_SYNC | ||
748 | int err; | ||
749 | |||
750 | err = semaphore_wait_fd_native(c, fd, entry); | ||
751 | if (err) | ||
752 | err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline); | ||
753 | return err; | ||
765 | #else | 754 | #else |
766 | nvgpu_err(c->g, | 755 | nvgpu_err(c->g, |
767 | "trying to use sync fds with CONFIG_SYNC disabled"); | 756 | "trying to use sync fds with CONFIG_SYNC disabled"); |
@@ -798,7 +787,7 @@ static int __gk20a_channel_semaphore_incr( | |||
798 | } | 787 | } |
799 | 788 | ||
800 | /* Release the completion semaphore. */ | 789 | /* Release the completion semaphore. */ |
801 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); | 790 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd); |
802 | 791 | ||
803 | err = gk20a_fence_from_semaphore(c->g, fence, | 792 | err = gk20a_fence_from_semaphore(c->g, fence, |
804 | sp->timeline, semaphore, | 793 | sp->timeline, semaphore, |