summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c175
1 files changed, 82 insertions, 93 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 4b1be8b9..c6b55bf8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -502,10 +502,10 @@ static void gk20a_channel_semaphore_launcher(
502 502
503static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, 503static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
504 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, 504 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
505 int cmd_size, bool acquire, bool wfi) 505 u32 offset, bool acquire, bool wfi)
506{ 506{
507 int ch = c->chid; 507 int ch = c->chid;
508 u32 ob, off = cmd->off; 508 u32 ob, off = cmd->off + offset;
509 u64 va; 509 u64 va;
510 510
511 ob = off; 511 ob = off;
@@ -588,108 +588,79 @@ static int gk20a_channel_semaphore_wait_syncpt(
588} 588}
589 589
590#ifdef CONFIG_SYNC 590#ifdef CONFIG_SYNC
591/* 591static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
592 * Attempt a fast path for waiting on a sync_fence. Basically if the passed 592 struct priv_cmd_entry *wait_cmd)
593 * sync_fence is backed by a nvgpu_semaphore then there's no reason to go
594 * through the rigmarole of setting up a separate semaphore which waits on an
595 * interrupt from the GPU and then triggers a worker thread to execute a SW
596 * based semaphore release. Instead just have the GPU wait on the same semaphore
597 * that is going to be incremented by the GPU.
598 *
599 * This function returns 2 possible values: -ENODEV or 0 on success. In the case
600 * of -ENODEV the fastpath cannot be taken due to the fence not being backed by
601 * a GPU semaphore.
602 */
603static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
604 struct sync_fence *fence,
605 struct priv_cmd_entry *wait_cmd,
606 struct nvgpu_semaphore **fp_sema)
607{ 593{
608 struct nvgpu_semaphore *sema; 594 struct sync_fence *sync_fence;
609 int err; 595 int err;
596 const int wait_cmd_size = 8;
597 int num_wait_cmds;
598 int i;
610 599
611 if (!gk20a_is_sema_backed_sync_fence(fence)) 600 sync_fence = gk20a_sync_fence_fdget(fd);
612 return -ENODEV; 601 if (!sync_fence)
613 602 return -EINVAL;
614 sema = gk20a_sync_fence_get_sema(fence);
615 603
616 /* 604 num_wait_cmds = sync_fence->num_fences;
617 * If there's no underlying sema then that means the underlying sema has 605 if (num_wait_cmds == 0) {
618 * already signaled. 606 err = 0;
619 */ 607 goto put_fence;
620 if (!sema) {
621 *fp_sema = NULL;
622 return 0;
623 } 608 }
624 609
625 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); 610 err = gk20a_channel_alloc_priv_cmdbuf(c,
626 if (err) 611 wait_cmd_size * num_wait_cmds,
627 return err; 612 wait_cmd);
613 if (err) {
614 nvgpu_err(c->g, "not enough priv cmd buffer space");
615 goto put_fence;
616 }
628 617
629 nvgpu_semaphore_get(sema); 618 for (i = 0; i < sync_fence->num_fences; i++) {
630 BUG_ON(!sema->incremented); 619 struct fence *f = sync_fence->cbs[i].sync_pt;
631 add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false); 620 struct sync_pt *pt = sync_pt_from_fence(f);
621 struct nvgpu_semaphore *sema;
632 622
633 /* 623 sema = gk20a_sync_pt_sema(pt);
634 * Make sure that gk20a_channel_semaphore_wait_fd() can create another 624 if (!sema) {
635 * fence with the underlying semaphore. 625 /* expired */
636 */ 626 nvgpu_memset(c->g, wait_cmd->mem,
637 *fp_sema = sema; 627 (wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
628 0, wait_cmd_size * sizeof(u32));
629 } else {
630 WARN_ON(!sema->incremented);
631 add_sema_cmd(c->g, c, sema, wait_cmd,
632 i * wait_cmd_size, true, false);
633 nvgpu_semaphore_put(sema);
634 }
635 }
638 636
639 return 0; 637put_fence:
638 sync_fence_put(sync_fence);
639 return err;
640} 640}
641#endif
642 641
643static int gk20a_channel_semaphore_wait_fd( 642static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd,
644 struct gk20a_channel_sync *s, int fd, 643 struct priv_cmd_entry *wait_cmd,
645 struct priv_cmd_entry *entry, 644 struct gk20a_fence *fence_out,
646 struct gk20a_fence *fence) 645 struct sync_timeline *timeline)
647{ 646{
648 struct gk20a_channel_semaphore *sema = 647 const int wait_cmd_size = 8;
649 container_of(s, struct gk20a_channel_semaphore, ops);
650 struct channel_gk20a *c = sema->c;
651#ifdef CONFIG_SYNC
652 struct nvgpu_semaphore *fp_sema;
653 struct sync_fence *sync_fence; 648 struct sync_fence *sync_fence;
654 struct priv_cmd_entry *wait_cmd = entry;
655 struct wait_fence_work *w = NULL; 649 struct wait_fence_work *w = NULL;
656 int err, ret, status; 650 int err, status;
657 651
658 sync_fence = gk20a_sync_fence_fdget(fd); 652 sync_fence = sync_fence_fdget(fd);
659 if (!sync_fence) 653 if (!sync_fence)
660 return -EINVAL; 654 return -EINVAL;
661 655
662 ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
663 if (ret == 0) {
664 if (fp_sema) {
665 err = gk20a_fence_from_semaphore(c->g, fence,
666 sema->timeline,
667 fp_sema,
668 &c->semaphore_wq,
669 false);
670 if (err) {
671 nvgpu_semaphore_put(fp_sema);
672 goto clean_up_priv_cmd;
673 }
674 } else
675 /*
676 * Init an empty fence. It will instantly return
677 * from gk20a_fence_wait().
678 */
679 gk20a_init_fence(fence, NULL, NULL);
680
681 sync_fence_put(sync_fence);
682 goto skip_slow_path;
683 }
684
685 /* If the fence has signaled there is no reason to wait on it. */ 656 /* If the fence has signaled there is no reason to wait on it. */
686 status = atomic_read(&sync_fence->status); 657 status = atomic_read(&sync_fence->status);
687 if (status == 0) { 658 if (status == 0) {
688 sync_fence_put(sync_fence); 659 sync_fence_put(sync_fence);
689 goto skip_slow_path; 660 return 0;
690 } 661 }
691 662
692 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); 663 err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd);
693 if (err) { 664 if (err) {
694 nvgpu_err(c->g, 665 nvgpu_err(c->g,
695 "not enough priv cmd buffer space"); 666 "not enough priv cmd buffer space");
@@ -718,34 +689,34 @@ static int gk20a_channel_semaphore_wait_fd(
718 nvgpu_semaphore_incr(w->sema, c->hw_sema); 689 nvgpu_semaphore_incr(w->sema, c->hw_sema);
719 690
720 /* GPU unblocked when the semaphore value increments. */ 691 /* GPU unblocked when the semaphore value increments. */
721 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); 692 add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false);
722 693
723 /* 694 /*
724 * We need to create the fence before adding the waiter to ensure 695 * We need to create the fence before adding the waiter to ensure
725 * that we properly clean up in the event the sync_fence has 696 * that we properly clean up in the event the sync_fence has
726 * already signaled 697 * already signaled
727 */ 698 */
728 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, 699 err = gk20a_fence_from_semaphore(c->g, fence_out, timeline,
729 &c->semaphore_wq, false); 700 w->sema, &c->semaphore_wq, false);
730 if (err) 701 if (err)
731 goto clean_up_sema; 702 goto clean_up_sema;
732 703
733 ret = sync_fence_wait_async(sync_fence, &w->waiter); 704 err = sync_fence_wait_async(sync_fence, &w->waiter);
734 gk20a_add_pending_sema_wait(c->g, w); 705 gk20a_add_pending_sema_wait(c->g, w);
735 706
736 /* 707 /*
737 * If the sync_fence has already signaled then the above async_wait 708 * If the sync_fence has already signaled then the above wait_async
738 * will never trigger. This causes the semaphore release op to never 709 * will not get scheduled; the fence completed just after doing the
739 * happen which, in turn, hangs the GPU. That's bad. So let's just 710 * status check above before allocs and waiter init, and won the race.
740 * do the nvgpu_semaphore_release() right now. 711 * This causes the waiter to be skipped, so let's release the semaphore
712 * here and put the refs taken for the worker.
741 */ 713 */
742 if (ret == 1) { 714 if (err == 1) {
743 sync_fence_put(sync_fence); 715 sync_fence_put(sync_fence);
744 nvgpu_semaphore_release(w->sema, c->hw_sema); 716 nvgpu_semaphore_release(w->sema, c->hw_sema);
745 nvgpu_semaphore_put(w->sema); 717 nvgpu_semaphore_put(w->sema);
746 } 718 }
747 719
748skip_slow_path:
749 return 0; 720 return 0;
750 721
751clean_up_sema: 722clean_up_sema:
@@ -758,10 +729,28 @@ clean_up_sema:
758clean_up_worker: 729clean_up_worker:
759 nvgpu_kfree(c->g, w); 730 nvgpu_kfree(c->g, w);
760clean_up_priv_cmd: 731clean_up_priv_cmd:
761 gk20a_free_priv_cmdbuf(c, entry); 732 gk20a_free_priv_cmdbuf(c, wait_cmd);
762clean_up_sync_fence: 733clean_up_sync_fence:
763 sync_fence_put(sync_fence); 734 sync_fence_put(sync_fence);
764 return err; 735 return err;
736}
737#endif
738
739static int gk20a_channel_semaphore_wait_fd(
740 struct gk20a_channel_sync *s, int fd,
741 struct priv_cmd_entry *entry,
742 struct gk20a_fence *fence)
743{
744 struct gk20a_channel_semaphore *sema =
745 container_of(s, struct gk20a_channel_semaphore, ops);
746 struct channel_gk20a *c = sema->c;
747#ifdef CONFIG_SYNC
748 int err;
749
750 err = semaphore_wait_fd_native(c, fd, entry);
751 if (err)
752 err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline);
753 return err;
765#else 754#else
766 nvgpu_err(c->g, 755 nvgpu_err(c->g,
767 "trying to use sync fds with CONFIG_SYNC disabled"); 756 "trying to use sync fds with CONFIG_SYNC disabled");
@@ -798,7 +787,7 @@ static int __gk20a_channel_semaphore_incr(
798 } 787 }
799 788
800 /* Release the completion semaphore. */ 789 /* Release the completion semaphore. */
801 add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); 790 add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd);
802 791
803 err = gk20a_fence_from_semaphore(c->g, fence, 792 err = gk20a_fence_from_semaphore(c->g, fence,
804 sp->timeline, semaphore, 793 sp->timeline, semaphore,