aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-03-06 08:51:29 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2011-03-07 06:06:02 -0500
commitc59a333f73868ca6fbcecea99b3542e2c62a3a5c (patch)
tree7c858d5ea0f03b6fb8b7ed7169319dfa4d1797af /drivers/gpu
parent3d3dc149eda48566619d165f6b34e5eeca00edf1 (diff)
drm/i915: Only wait on a pending flip if we intend to write to the buffer
... as if we are only reading from it, we can do that concurrently with the queue flip. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c92
1 files changed, 44 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d461ad5f9290..8513c04dc892 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@ struct change_domains {
37 uint32_t invalidate_domains; 37 uint32_t invalidate_domains;
38 uint32_t flush_domains; 38 uint32_t flush_domains;
39 uint32_t flush_rings; 39 uint32_t flush_rings;
40 uint32_t flips;
40}; 41};
41 42
42/* 43/*
@@ -190,6 +191,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
190 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) 191 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
191 i915_gem_release_mmap(obj); 192 i915_gem_release_mmap(obj);
192 193
194 if (obj->base.pending_write_domain)
195 cd->flips |= atomic_read(&obj->pending_flip);
196
193 /* The actual obj->write_domain will be updated with 197 /* The actual obj->write_domain will be updated with
194 * pending_write_domain after we emit the accumulated flush for all 198 * pending_write_domain after we emit the accumulated flush for all
195 * of our domain changes in execbuffers (which clears objects' 199 * of our domain changes in execbuffers (which clears objects'
@@ -774,6 +778,39 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
774} 778}
775 779
776static int 780static int
781i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
782{
783 u32 plane, flip_mask;
784 int ret;
785
786 /* Check for any pending flips. As we only maintain a flip queue depth
787 * of 1, we can simply insert a WAIT for the next display flip prior
788 * to executing the batch and avoid stalling the CPU.
789 */
790
791 for (plane = 0; flips >> plane; plane++) {
792 if (((flips >> plane) & 1) == 0)
793 continue;
794
795 if (plane)
796 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
797 else
798 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
799
800 ret = intel_ring_begin(ring, 2);
801 if (ret)
802 return ret;
803
804 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
805 intel_ring_emit(ring, MI_NOOP);
806 intel_ring_advance(ring);
807 }
808
809 return 0;
810}
811
812
813static int
777i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 814i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
778 struct list_head *objects) 815 struct list_head *objects)
779{ 816{
@@ -781,9 +818,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
781 struct change_domains cd; 818 struct change_domains cd;
782 int ret; 819 int ret;
783 820
784 cd.invalidate_domains = 0; 821 memset(&cd, 0, sizeof(cd));
785 cd.flush_domains = 0;
786 cd.flush_rings = 0;
787 list_for_each_entry(obj, objects, exec_list) 822 list_for_each_entry(obj, objects, exec_list)
788 i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 823 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
789 824
@@ -796,6 +831,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
796 return ret; 831 return ret;
797 } 832 }
798 833
834 if (cd.flips) {
835 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
836 if (ret)
837 return ret;
838 }
839
799 list_for_each_entry(obj, objects, exec_list) { 840 list_for_each_entry(obj, objects, exec_list) {
800 ret = i915_gem_execbuffer_sync_rings(obj, ring); 841 ret = i915_gem_execbuffer_sync_rings(obj, ring);
801 if (ret) 842 if (ret)
@@ -842,47 +883,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
842 return 0; 883 return 0;
843} 884}
844 885
845static int
846i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring,
847 struct list_head *objects)
848{
849 struct drm_i915_gem_object *obj;
850 int flips;
851
852 /* Check for any pending flips. As we only maintain a flip queue depth
853 * of 1, we can simply insert a WAIT for the next display flip prior
854 * to executing the batch and avoid stalling the CPU.
855 */
856 flips = 0;
857 list_for_each_entry(obj, objects, exec_list) {
858 if (obj->base.write_domain)
859 flips |= atomic_read(&obj->pending_flip);
860 }
861 if (flips) {
862 int plane, flip_mask, ret;
863
864 for (plane = 0; flips >> plane; plane++) {
865 if (((flips >> plane) & 1) == 0)
866 continue;
867
868 if (plane)
869 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
870 else
871 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
872
873 ret = intel_ring_begin(ring, 2);
874 if (ret)
875 return ret;
876
877 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
878 intel_ring_emit(ring, MI_NOOP);
879 intel_ring_advance(ring);
880 }
881 }
882
883 return 0;
884}
885
886static void 886static void
887i915_gem_execbuffer_move_to_active(struct list_head *objects, 887i915_gem_execbuffer_move_to_active(struct list_head *objects,
888 struct intel_ring_buffer *ring, 888 struct intel_ring_buffer *ring,
@@ -1133,10 +1133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1133 if (ret) 1133 if (ret)
1134 goto err; 1134 goto err;
1135 1135
1136 ret = i915_gem_execbuffer_wait_for_flips(ring, &objects);
1137 if (ret)
1138 goto err;
1139
1140 seqno = i915_gem_next_request_seqno(ring); 1136 seqno = i915_gem_next_request_seqno(ring);
1141 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { 1137 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
1142 if (seqno < ring->sync_seqno[i]) { 1138 if (seqno < ring->sync_seqno[i]) {