aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2017-08-25 13:52:24 -0400
committerDave Airlie <airlied@redhat.com>2017-08-28 16:28:17 -0400
commite7aca5031a2fb51b6120864d0eff5478c95e6651 (patch)
treebb653669e0250ca06978c5705e4023f3de05eccf
parent1fc08218ed2a42c86af5c905fe4c00885376a07e (diff)
drm/syncobj: Allow wait for submit and signal behavior (v5)
Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/drm_syncobj.c252
-rw-r--r--include/uapi/drm/drm.h1
2 files changed, 208 insertions, 45 deletions
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 12db8c9564cd..cccd3bd194c6 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -51,6 +51,7 @@
51#include <linux/fs.h> 51#include <linux/fs.h>
52#include <linux/anon_inodes.h> 52#include <linux/anon_inodes.h>
53#include <linux/sync_file.h> 53#include <linux/sync_file.h>
54#include <linux/sched/signal.h>
54 55
55#include "drm_internal.h" 56#include "drm_internal.h"
56#include <drm/drm_syncobj.h> 57#include <drm/drm_syncobj.h>
@@ -88,6 +89,35 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
88 list_add_tail(&cb->node, &syncobj->cb_list); 89 list_add_tail(&cb->node, &syncobj->cb_list);
89} 90}
90 91
92static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
93 struct dma_fence **fence,
94 struct drm_syncobj_cb *cb,
95 drm_syncobj_func_t func)
96{
97 int ret;
98
99 *fence = drm_syncobj_fence_get(syncobj);
100 if (*fence)
101 return 1;
102
103 spin_lock(&syncobj->lock);
104 /* We've already tried once to get a fence and failed. Now that we
105 * have the lock, try one more time just to be sure we don't add a
106 * callback when a fence has already been set.
107 */
108 if (syncobj->fence) {
109 *fence = dma_fence_get(syncobj->fence);
110 ret = 1;
111 } else {
112 *fence = NULL;
113 drm_syncobj_add_callback_locked(syncobj, cb, func);
114 ret = 0;
115 }
116 spin_unlock(&syncobj->lock);
117
118 return ret;
119}
120
91/** 121/**
92 * drm_syncobj_add_callback - adds a callback to syncobj::cb_list 122 * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
93 * @syncobj: Sync object to which to add the callback 123 * @syncobj: Sync object to which to add the callback
@@ -560,6 +590,160 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
560 &args->handle); 590 &args->handle);
561} 591}
562 592
593struct syncobj_wait_entry {
594 struct task_struct *task;
595 struct dma_fence *fence;
596 struct dma_fence_cb fence_cb;
597 struct drm_syncobj_cb syncobj_cb;
598};
599
600static void syncobj_wait_fence_func(struct dma_fence *fence,
601 struct dma_fence_cb *cb)
602{
603 struct syncobj_wait_entry *wait =
604 container_of(cb, struct syncobj_wait_entry, fence_cb);
605
606 wake_up_process(wait->task);
607}
608
609static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
610 struct drm_syncobj_cb *cb)
611{
612 struct syncobj_wait_entry *wait =
613 container_of(cb, struct syncobj_wait_entry, syncobj_cb);
614
615 /* This happens inside the syncobj lock */
616 wait->fence = dma_fence_get(syncobj->fence);
617 wake_up_process(wait->task);
618}
619
620static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
621 uint32_t count,
622 uint32_t flags,
623 signed long timeout,
624 uint32_t *idx)
625{
626 struct syncobj_wait_entry *entries;
627 struct dma_fence *fence;
628 signed long ret;
629 uint32_t signaled_count, i;
630
631 entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
632 if (!entries)
633 return -ENOMEM;
634
635 /* Walk the list of sync objects and initialize entries. We do
636 * this up-front so that we can properly return -EINVAL if there is
637 * a syncobj with a missing fence and then never have the chance of
638 * returning -EINVAL again.
639 */
640 signaled_count = 0;
641 for (i = 0; i < count; ++i) {
642 entries[i].task = current;
643 entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
644 if (!entries[i].fence) {
645 if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
646 continue;
647 } else {
648 ret = -EINVAL;
649 goto cleanup_entries;
650 }
651 }
652
653 if (dma_fence_is_signaled(entries[i].fence)) {
654 if (signaled_count == 0 && idx)
655 *idx = i;
656 signaled_count++;
657 }
658 }
659
660 /* Initialize ret to the max of timeout and 1. That way, the
661 * default return value indicates a successful wait and not a
662 * timeout.
663 */
664 ret = max_t(signed long, timeout, 1);
665
666 if (signaled_count == count ||
667 (signaled_count > 0 &&
668 !(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)))
669 goto cleanup_entries;
670
671 /* There's a very annoying laxness in the dma_fence API here, in
672 * that backends are not required to automatically report when a
673 * fence is signaled prior to fence->ops->enable_signaling() being
674 * called. So here if we fail to match signaled_count, we need to
675 * fallthough and try a 0 timeout wait!
676 */
677
678 if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
679 for (i = 0; i < count; ++i) {
680 drm_syncobj_fence_get_or_add_callback(syncobjs[i],
681 &entries[i].fence,
682 &entries[i].syncobj_cb,
683 syncobj_wait_syncobj_func);
684 }
685 }
686
687 do {
688 set_current_state(TASK_INTERRUPTIBLE);
689
690 signaled_count = 0;
691 for (i = 0; i < count; ++i) {
692 fence = entries[i].fence;
693 if (!fence)
694 continue;
695
696 if (dma_fence_is_signaled(fence) ||
697 (!entries[i].fence_cb.func &&
698 dma_fence_add_callback(fence,
699 &entries[i].fence_cb,
700 syncobj_wait_fence_func))) {
701 /* The fence has been signaled */
702 if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
703 signaled_count++;
704 } else {
705 if (idx)
706 *idx = i;
707 goto done_waiting;
708 }
709 }
710 }
711
712 if (signaled_count == count)
713 goto done_waiting;
714
715 if (timeout == 0) {
716 /* If we are doing a 0 timeout wait and we got
717 * here, then we just timed out.
718 */
719 ret = 0;
720 goto done_waiting;
721 }
722
723 ret = schedule_timeout(ret);
724
725 if (ret > 0 && signal_pending(current))
726 ret = -ERESTARTSYS;
727 } while (ret > 0);
728
729done_waiting:
730 __set_current_state(TASK_RUNNING);
731
732cleanup_entries:
733 for (i = 0; i < count; ++i) {
734 if (entries[i].syncobj_cb.func)
735 drm_syncobj_remove_callback(syncobjs[i],
736 &entries[i].syncobj_cb);
737 if (entries[i].fence_cb.func)
738 dma_fence_remove_callback(entries[i].fence,
739 &entries[i].fence_cb);
740 dma_fence_put(entries[i].fence);
741 }
742 kfree(entries);
743
744 return ret;
745}
746
563/** 747/**
564 * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value 748 * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
565 * 749 *
@@ -592,43 +776,19 @@ static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec)
592 return timeout_jiffies64 + 1; 776 return timeout_jiffies64 + 1;
593} 777}
594 778
595static int drm_syncobj_wait_fences(struct drm_device *dev, 779static int drm_syncobj_array_wait(struct drm_device *dev,
596 struct drm_file *file_private, 780 struct drm_file *file_private,
597 struct drm_syncobj_wait *wait, 781 struct drm_syncobj_wait *wait,
598 struct dma_fence **fences) 782 struct drm_syncobj **syncobjs)
599{ 783{
600 signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); 784 signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
601 signed long ret = 0; 785 signed long ret = 0;
602 uint32_t first = ~0; 786 uint32_t first = ~0;
603 787
604 if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { 788 ret = drm_syncobj_array_wait_timeout(syncobjs,
605 uint32_t i; 789 wait->count_handles,
606 for (i = 0; i < wait->count_handles; i++) { 790 wait->flags,
607 ret = dma_fence_wait_timeout(fences[i], true, timeout); 791 timeout, &first);
608
609 /* Various dma_fence wait callbacks will return
610 * ENOENT to indicate that the fence has already
611 * been signaled. We need to sanitize this to 0 so
612 * we don't return early and the client doesn't see
613 * an unexpected error.
614 */
615 if (ret == -ENOENT)
616 ret = 0;
617
618 if (ret < 0)
619 return ret;
620 if (ret == 0)
621 break;
622 timeout = ret;
623 }
624 first = 0;
625 } else {
626 ret = dma_fence_wait_any_timeout(fences,
627 wait->count_handles,
628 true, timeout,
629 &first);
630 }
631
632 if (ret < 0) 792 if (ret < 0)
633 return ret; 793 return ret;
634 794
@@ -644,14 +804,15 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
644{ 804{
645 struct drm_syncobj_wait *args = data; 805 struct drm_syncobj_wait *args = data;
646 uint32_t *handles; 806 uint32_t *handles;
647 struct dma_fence **fences; 807 struct drm_syncobj **syncobjs;
648 int ret = 0; 808 int ret = 0;
649 uint32_t i; 809 uint32_t i;
650 810
651 if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) 811 if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
652 return -ENODEV; 812 return -ENODEV;
653 813
654 if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) 814 if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
815 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
655 return -EINVAL; 816 return -EINVAL;
656 817
657 if (args->count_handles == 0) 818 if (args->count_handles == 0)
@@ -670,27 +831,28 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
670 goto err_free_handles; 831 goto err_free_handles;
671 } 832 }
672 833
673 fences = kcalloc(args->count_handles, 834 syncobjs = kcalloc(args->count_handles,
674 sizeof(struct dma_fence *), GFP_KERNEL); 835 sizeof(struct drm_syncobj *), GFP_KERNEL);
675 if (!fences) { 836 if (!syncobjs) {
676 ret = -ENOMEM; 837 ret = -ENOMEM;
677 goto err_free_handles; 838 goto err_free_handles;
678 } 839 }
679 840
680 for (i = 0; i < args->count_handles; i++) { 841 for (i = 0; i < args->count_handles; i++) {
681 ret = drm_syncobj_find_fence(file_private, handles[i], 842 syncobjs[i] = drm_syncobj_find(file_private, handles[i]);
682 &fences[i]); 843 if (!syncobjs[i]) {
683 if (ret) 844 ret = -ENOENT;
684 goto err_free_fence_array; 845 goto err_free_fence_array;
846 }
685 } 847 }
686 848
687 ret = drm_syncobj_wait_fences(dev, file_private, 849 ret = drm_syncobj_array_wait(dev, file_private,
688 args, fences); 850 args, syncobjs);
689 851
690err_free_fence_array: 852err_free_fence_array:
691 for (i = 0; i < args->count_handles; i++) 853 while (i-- > 0)
692 dma_fence_put(fences[i]); 854 drm_syncobj_put(syncobjs[i]);
693 kfree(fences); 855 kfree(syncobjs);
694err_free_handles: 856err_free_handles:
695 kfree(handles); 857 kfree(handles);
696 858
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index ade7f68d32b5..4c746597225e 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -720,6 +720,7 @@ struct drm_syncobj_handle {
720}; 720};
721 721
722#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) 722#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
723#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
723struct drm_syncobj_wait { 724struct drm_syncobj_wait {
724 __u64 handles; 725 __u64 handles;
725 /* absolute timeout */ 726 /* absolute timeout */