aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2012-06-13 14:45:19 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-06-20 07:54:28 -0400
commitcc889e0f6ce6a63c62db17d702ecfed86d58083f (patch)
treea3e1154ba534b842ca428ea257f27290ece81e7b /drivers
parent8e88a2bd5987178d16d53686197404e149e996d9 (diff)
drm/i915: disable flushing_list/gpu_write_list
This is just the minimal patch to disable all this code so that we can do decent amounts of QA before we rip it all out. The complicating thing is that we need to flush the gpu caches after the batchbuffer is emitted. Which is past the point of no return where execbuffer can't fail any more (otherwise we risk submitting the same batch multiple times). Hence we need to add a flag to track whether any caches associated with that ring are dirty. And emit the flush in add_request if that's the case. Note that this has a quite a few behaviour changes: - Caches get flushed/invalidated unconditionally. - Invalidation now happens after potential inter-ring sync. I've bantered around a bit with Chris on irc whether this fixes anything, and it might or might not. The only thing clear is that with these changes it's much easier to reason about correctness. Also rip out a lone get_next_request_seqno in the execbuffer retire_commands function. I've dug around and I couldn't figure out why that is still there, with the outstanding lazy request stuff it shouldn't be necessary. v2: Chris Wilson complained that I also invalidate the read caches when flushing after a batchbuffer. Now optimized. v3: Added some comments to explain the new flushing behaviour. Cc: Eric Anholt <eric@anholt.net> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c25
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c52
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h1
3 files changed, 33 insertions, 45 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3596f3415624..6a98c0659324 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1568,6 +1568,21 @@ i915_add_request(struct intel_ring_buffer *ring,
1568 int was_empty; 1568 int was_empty;
1569 int ret; 1569 int ret;
1570 1570
1571 /*
1572 * Emit any outstanding flushes - execbuf can fail to emit the flush
1573 * after having emitted the batchbuffer command. Hence we need to fix
1574 * things up similar to emitting the lazy request. The difference here
1575 * is that the flush _must_ happen before the next request, no matter
1576 * what.
1577 */
1578 if (ring->gpu_caches_dirty) {
1579 ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS);
1580 if (ret)
1581 return ret;
1582
1583 ring->gpu_caches_dirty = false;
1584 }
1585
1571 BUG_ON(request == NULL); 1586 BUG_ON(request == NULL);
1572 seqno = i915_gem_next_request_seqno(ring); 1587 seqno = i915_gem_next_request_seqno(ring);
1573 1588
@@ -1613,6 +1628,9 @@ i915_add_request(struct intel_ring_buffer *ring,
1613 queue_delayed_work(dev_priv->wq, 1628 queue_delayed_work(dev_priv->wq,
1614 &dev_priv->mm.retire_work, HZ); 1629 &dev_priv->mm.retire_work, HZ);
1615 } 1630 }
1631
1632 WARN_ON(!list_empty(&ring->gpu_write_list));
1633
1616 return 0; 1634 return 0;
1617} 1635}
1618 1636
@@ -1827,14 +1845,11 @@ i915_gem_retire_work_handler(struct work_struct *work)
1827 */ 1845 */
1828 idle = true; 1846 idle = true;
1829 for_each_ring(ring, dev_priv, i) { 1847 for_each_ring(ring, dev_priv, i) {
1830 if (!list_empty(&ring->gpu_write_list)) { 1848 if (ring->gpu_caches_dirty) {
1831 struct drm_i915_gem_request *request; 1849 struct drm_i915_gem_request *request;
1832 int ret;
1833 1850
1834 ret = i915_gem_flush_ring(ring,
1835 0, I915_GEM_GPU_DOMAINS);
1836 request = kzalloc(sizeof(*request), GFP_KERNEL); 1851 request = kzalloc(sizeof(*request), GFP_KERNEL);
1837 if (ret || request == NULL || 1852 if (request == NULL ||
1838 i915_add_request(ring, NULL, request)) 1853 i915_add_request(ring, NULL, request))
1839 kfree(request); 1854 kfree(request);
1840 } 1855 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f32d02464bce..88e2e114189c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -810,33 +810,16 @@ err:
810 return ret; 810 return ret;
811} 811}
812 812
813static int 813static void
814i915_gem_execbuffer_flush(struct drm_device *dev, 814i915_gem_execbuffer_flush(struct drm_device *dev,
815 uint32_t invalidate_domains, 815 uint32_t invalidate_domains,
816 uint32_t flush_domains, 816 uint32_t flush_domains)
817 uint32_t flush_rings)
818{ 817{
819 drm_i915_private_t *dev_priv = dev->dev_private;
820 int i, ret;
821
822 if (flush_domains & I915_GEM_DOMAIN_CPU) 818 if (flush_domains & I915_GEM_DOMAIN_CPU)
823 intel_gtt_chipset_flush(); 819 intel_gtt_chipset_flush();
824 820
825 if (flush_domains & I915_GEM_DOMAIN_GTT) 821 if (flush_domains & I915_GEM_DOMAIN_GTT)
826 wmb(); 822 wmb();
827
828 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
829 for (i = 0; i < I915_NUM_RINGS; i++)
830 if (flush_rings & (1 << i)) {
831 ret = i915_gem_flush_ring(&dev_priv->ring[i],
832 invalidate_domains,
833 flush_domains);
834 if (ret)
835 return ret;
836 }
837 }
838
839 return 0;
840} 823}
841 824
842static int 825static int
@@ -885,12 +868,9 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
885 i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 868 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
886 869
887 if (cd.invalidate_domains | cd.flush_domains) { 870 if (cd.invalidate_domains | cd.flush_domains) {
888 ret = i915_gem_execbuffer_flush(ring->dev, 871 i915_gem_execbuffer_flush(ring->dev,
889 cd.invalidate_domains, 872 cd.invalidate_domains,
890 cd.flush_domains, 873 cd.flush_domains);
891 cd.flush_rings);
892 if (ret)
893 return ret;
894 } 874 }
895 875
896 if (cd.flips) { 876 if (cd.flips) {
@@ -905,6 +885,11 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
905 return ret; 885 return ret;
906 } 886 }
907 887
888 /* Unconditionally invalidate gpu caches. */
889 ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, 0);
890 if (ret)
891 return ret;
892
908 return 0; 893 return 0;
909} 894}
910 895
@@ -983,26 +968,13 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
983 struct intel_ring_buffer *ring) 968 struct intel_ring_buffer *ring)
984{ 969{
985 struct drm_i915_gem_request *request; 970 struct drm_i915_gem_request *request;
986 u32 invalidate;
987 971
988 /* 972 /* Unconditionally force add_request to emit a full flush. */
989 * Ensure that the commands in the batch buffer are 973 ring->gpu_caches_dirty = true;
990 * finished before the interrupt fires.
991 *
992 * The sampler always gets flushed on i965 (sigh).
993 */
994 invalidate = I915_GEM_DOMAIN_COMMAND;
995 if (INTEL_INFO(dev)->gen >= 4)
996 invalidate |= I915_GEM_DOMAIN_SAMPLER;
997 if (ring->flush(ring, invalidate, 0)) {
998 i915_gem_next_request_seqno(ring);
999 return;
1000 }
1001 974
1002 /* Add a breadcrumb for the completion of the batch buffer */ 975 /* Add a breadcrumb for the completion of the batch buffer */
1003 request = kzalloc(sizeof(*request), GFP_KERNEL); 976 request = kzalloc(sizeof(*request), GFP_KERNEL);
1004 if (request == NULL || i915_add_request(ring, file, request)) { 977 if (request == NULL || i915_add_request(ring, file, request)) {
1005 i915_gem_next_request_seqno(ring);
1006 kfree(request); 978 kfree(request);
1007 } 979 }
1008} 980}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 594c9c4ad396..1d3c81fdad92 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -113,6 +113,7 @@ struct intel_ring_buffer {
113 * Do we have some not yet emitted requests outstanding? 113 * Do we have some not yet emitted requests outstanding?
114 */ 114 */
115 u32 outstanding_lazy_request; 115 u32 outstanding_lazy_request;
116 bool gpu_caches_dirty;
116 117
117 wait_queue_head_t irq_queue; 118 wait_queue_head_t irq_queue;
118 119