diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-03-06 08:51:29 -0500 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-03-07 06:06:02 -0500 |
commit | c59a333f73868ca6fbcecea99b3542e2c62a3a5c (patch) | |
tree | 7c858d5ea0f03b6fb8b7ed7169319dfa4d1797af /drivers/gpu | |
parent | 3d3dc149eda48566619d165f6b34e5eeca00edf1 (diff) |
drm/i915: Only wait on a pending flip if we intend to write to the buffer
... as if we are only reading from it, we can do that concurrently with
the queue flip.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 |
1 files changed, 44 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d461ad5f9290..8513c04dc892 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -37,6 +37,7 @@ struct change_domains { | |||
37 | uint32_t invalidate_domains; | 37 | uint32_t invalidate_domains; |
38 | uint32_t flush_domains; | 38 | uint32_t flush_domains; |
39 | uint32_t flush_rings; | 39 | uint32_t flush_rings; |
40 | uint32_t flips; | ||
40 | }; | 41 | }; |
41 | 42 | ||
42 | /* | 43 | /* |
@@ -190,6 +191,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, | |||
190 | if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) | 191 | if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) |
191 | i915_gem_release_mmap(obj); | 192 | i915_gem_release_mmap(obj); |
192 | 193 | ||
194 | if (obj->base.pending_write_domain) | ||
195 | cd->flips |= atomic_read(&obj->pending_flip); | ||
196 | |||
193 | /* The actual obj->write_domain will be updated with | 197 | /* The actual obj->write_domain will be updated with |
194 | * pending_write_domain after we emit the accumulated flush for all | 198 | * pending_write_domain after we emit the accumulated flush for all |
195 | * of our domain changes in execbuffers (which clears objects' | 199 | * of our domain changes in execbuffers (which clears objects' |
@@ -774,6 +778,39 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, | |||
774 | } | 778 | } |
775 | 779 | ||
776 | static int | 780 | static int |
781 | i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) | ||
782 | { | ||
783 | u32 plane, flip_mask; | ||
784 | int ret; | ||
785 | |||
786 | /* Check for any pending flips. As we only maintain a flip queue depth | ||
787 | * of 1, we can simply insert a WAIT for the next display flip prior | ||
788 | * to executing the batch and avoid stalling the CPU. | ||
789 | */ | ||
790 | |||
791 | for (plane = 0; flips >> plane; plane++) { | ||
792 | if (((flips >> plane) & 1) == 0) | ||
793 | continue; | ||
794 | |||
795 | if (plane) | ||
796 | flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; | ||
797 | else | ||
798 | flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; | ||
799 | |||
800 | ret = intel_ring_begin(ring, 2); | ||
801 | if (ret) | ||
802 | return ret; | ||
803 | |||
804 | intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); | ||
805 | intel_ring_emit(ring, MI_NOOP); | ||
806 | intel_ring_advance(ring); | ||
807 | } | ||
808 | |||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | |||
813 | static int | ||
777 | i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, | 814 | i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, |
778 | struct list_head *objects) | 815 | struct list_head *objects) |
779 | { | 816 | { |
@@ -781,9 +818,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, | |||
781 | struct change_domains cd; | 818 | struct change_domains cd; |
782 | int ret; | 819 | int ret; |
783 | 820 | ||
784 | cd.invalidate_domains = 0; | 821 | memset(&cd, 0, sizeof(cd)); |
785 | cd.flush_domains = 0; | ||
786 | cd.flush_rings = 0; | ||
787 | list_for_each_entry(obj, objects, exec_list) | 822 | list_for_each_entry(obj, objects, exec_list) |
788 | i915_gem_object_set_to_gpu_domain(obj, ring, &cd); | 823 | i915_gem_object_set_to_gpu_domain(obj, ring, &cd); |
789 | 824 | ||
@@ -796,6 +831,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, | |||
796 | return ret; | 831 | return ret; |
797 | } | 832 | } |
798 | 833 | ||
834 | if (cd.flips) { | ||
835 | ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); | ||
836 | if (ret) | ||
837 | return ret; | ||
838 | } | ||
839 | |||
799 | list_for_each_entry(obj, objects, exec_list) { | 840 | list_for_each_entry(obj, objects, exec_list) { |
800 | ret = i915_gem_execbuffer_sync_rings(obj, ring); | 841 | ret = i915_gem_execbuffer_sync_rings(obj, ring); |
801 | if (ret) | 842 | if (ret) |
@@ -842,47 +883,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, | |||
842 | return 0; | 883 | return 0; |
843 | } | 884 | } |
844 | 885 | ||
845 | static int | ||
846 | i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, | ||
847 | struct list_head *objects) | ||
848 | { | ||
849 | struct drm_i915_gem_object *obj; | ||
850 | int flips; | ||
851 | |||
852 | /* Check for any pending flips. As we only maintain a flip queue depth | ||
853 | * of 1, we can simply insert a WAIT for the next display flip prior | ||
854 | * to executing the batch and avoid stalling the CPU. | ||
855 | */ | ||
856 | flips = 0; | ||
857 | list_for_each_entry(obj, objects, exec_list) { | ||
858 | if (obj->base.write_domain) | ||
859 | flips |= atomic_read(&obj->pending_flip); | ||
860 | } | ||
861 | if (flips) { | ||
862 | int plane, flip_mask, ret; | ||
863 | |||
864 | for (plane = 0; flips >> plane; plane++) { | ||
865 | if (((flips >> plane) & 1) == 0) | ||
866 | continue; | ||
867 | |||
868 | if (plane) | ||
869 | flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; | ||
870 | else | ||
871 | flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; | ||
872 | |||
873 | ret = intel_ring_begin(ring, 2); | ||
874 | if (ret) | ||
875 | return ret; | ||
876 | |||
877 | intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); | ||
878 | intel_ring_emit(ring, MI_NOOP); | ||
879 | intel_ring_advance(ring); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | return 0; | ||
884 | } | ||
885 | |||
886 | static void | 886 | static void |
887 | i915_gem_execbuffer_move_to_active(struct list_head *objects, | 887 | i915_gem_execbuffer_move_to_active(struct list_head *objects, |
888 | struct intel_ring_buffer *ring, | 888 | struct intel_ring_buffer *ring, |
@@ -1133,10 +1133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, | |||
1133 | if (ret) | 1133 | if (ret) |
1134 | goto err; | 1134 | goto err; |
1135 | 1135 | ||
1136 | ret = i915_gem_execbuffer_wait_for_flips(ring, &objects); | ||
1137 | if (ret) | ||
1138 | goto err; | ||
1139 | |||
1140 | seqno = i915_gem_next_request_seqno(ring); | 1136 | seqno = i915_gem_next_request_seqno(ring); |
1141 | for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { | 1137 | for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { |
1142 | if (seqno < ring->sync_seqno[i]) { | 1138 | if (seqno < ring->sync_seqno[i]) { |