aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-08-09 05:58:30 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-08-10 05:11:32 -0400
commitb2eadbc85b2c26df3fd2fe5c53c2a47cfd307249 (patch)
treee108c42b202147c9f3b1694065f17c5f7df1d84f
parent456470eb583f063ee84c6818251e638598be0fb8 (diff)
drm/i915: Lazily apply the SNB+ seqno w/a
Avoid the forcewake overhead when simply retiring requests, as often the last seen seqno is good enough to satisfy the retirment process and will be promptly re-run in any case. Only ensure that we force the coherent seqno read when we are explicitly waiting upon a completion event to be sure that none go missing, and also for when we are reporting seqno values in case of error or debugging. This greatly reduces the load for userspace using the busy-ioctl to track active buffers, for instance halving the CPU used by X in pushing the pixels from a software render (flash). The effect will be even more magnified with userptr and so providing a zero-copy upload path in that instance, or in similar instances where X is simply compositing DRI buffers. v2: Reverse the polarity of the tachyon stream. Daniel suggested that 'force' was too generic for the parameter name and that 'lazy_coherency' better encapsulated the semantics of it being an optimization and its purpose. Also notice that gen6_get_seqno() is only used by gen6/7 chipsets and so the test for IS_GEN6 || IS_GEN7 is redundant in that function. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c6
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c9
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c10
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h9
5 files changed, 21 insertions, 15 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ed4bc98095b1..0e8f14d04cda 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -391,7 +391,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
391{ 391{
392 if (ring->get_seqno) { 392 if (ring->get_seqno) {
393 seq_printf(m, "Current sequence (%s): %d\n", 393 seq_printf(m, "Current sequence (%s): %d\n",
394 ring->name, ring->get_seqno(ring)); 394 ring->name, ring->get_seqno(ring, false));
395 } 395 }
396} 396}
397 397
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c540321b42ba..051459324826 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1716,7 +1716,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1716 1716
1717 WARN_ON(i915_verify_lists(ring->dev)); 1717 WARN_ON(i915_verify_lists(ring->dev));
1718 1718
1719 seqno = ring->get_seqno(ring); 1719 seqno = ring->get_seqno(ring, true);
1720 1720
1721 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1721 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1722 if (seqno >= ring->sync_seqno[i]) 1722 if (seqno >= ring->sync_seqno[i])
@@ -1888,7 +1888,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1888 bool wait_forever = true; 1888 bool wait_forever = true;
1889 int ret; 1889 int ret;
1890 1890
1891 if (i915_seqno_passed(ring->get_seqno(ring), seqno)) 1891 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1892 return 0; 1892 return 0;
1893 1893
1894 trace_i915_gem_request_wait_begin(ring, seqno); 1894 trace_i915_gem_request_wait_begin(ring, seqno);
@@ -1907,7 +1907,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1907 getrawmonotonic(&before); 1907 getrawmonotonic(&before);
1908 1908
1909#define EXIT_COND \ 1909#define EXIT_COND \
1910 (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ 1910 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
1911 atomic_read(&dev_priv->mm.wedged)) 1911 atomic_read(&dev_priv->mm.wedged))
1912 do { 1912 do {
1913 if (interruptible) 1913 if (interruptible)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d15ea50f5854..0c37101934f8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -347,7 +347,7 @@ static void notify_ring(struct drm_device *dev,
347 if (ring->obj == NULL) 347 if (ring->obj == NULL)
348 return; 348 return;
349 349
350 trace_i915_gem_request_complete(ring, ring->get_seqno(ring)); 350 trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
351 351
352 wake_up_all(&ring->irq_queue); 352 wake_up_all(&ring->irq_queue);
353 if (i915_enable_hangcheck) { 353 if (i915_enable_hangcheck) {
@@ -1051,7 +1051,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
1051 if (!ring->get_seqno) 1051 if (!ring->get_seqno)
1052 return NULL; 1052 return NULL;
1053 1053
1054 seqno = ring->get_seqno(ring); 1054 seqno = ring->get_seqno(ring, false);
1055 list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { 1055 list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
1056 if (obj->ring != ring) 1056 if (obj->ring != ring)
1057 continue; 1057 continue;
@@ -1105,7 +1105,7 @@ static void i915_record_ring_state(struct drm_device *dev,
1105 1105
1106 error->waiting[ring->id] = waitqueue_active(&ring->irq_queue); 1106 error->waiting[ring->id] = waitqueue_active(&ring->irq_queue);
1107 error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base)); 1107 error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
1108 error->seqno[ring->id] = ring->get_seqno(ring); 1108 error->seqno[ring->id] = ring->get_seqno(ring, false);
1109 error->acthd[ring->id] = intel_ring_get_active_head(ring); 1109 error->acthd[ring->id] = intel_ring_get_active_head(ring);
1110 error->head[ring->id] = I915_READ_HEAD(ring); 1110 error->head[ring->id] = I915_READ_HEAD(ring);
1111 error->tail[ring->id] = I915_READ_TAIL(ring); 1111 error->tail[ring->id] = I915_READ_TAIL(ring);
@@ -1602,7 +1602,8 @@ ring_last_seqno(struct intel_ring_buffer *ring)
1602static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err) 1602static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err)
1603{ 1603{
1604 if (list_empty(&ring->request_list) || 1604 if (list_empty(&ring->request_list) ||
1605 i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) { 1605 i915_seqno_passed(ring->get_seqno(ring, false),
1606 ring_last_seqno(ring))) {
1606 /* Issue a wake-up to catch stuck h/w. */ 1607 /* Issue a wake-up to catch stuck h/w. */
1607 if (waitqueue_active(&ring->irq_queue)) { 1608 if (waitqueue_active(&ring->irq_queue)) {
1608 DRM_ERROR("Hangcheck timer elapsed... %s idle\n", 1609 DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8733da529edf..e278675cdff9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -625,26 +625,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
625} 625}
626 626
627static u32 627static u32
628gen6_ring_get_seqno(struct intel_ring_buffer *ring) 628gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
629{ 629{
630 struct drm_device *dev = ring->dev;
631
632 /* Workaround to force correct ordering between irq and seqno writes on 630 /* Workaround to force correct ordering between irq and seqno writes on
633 * ivb (and maybe also on snb) by reading from a CS register (like 631 * ivb (and maybe also on snb) by reading from a CS register (like
634 * ACTHD) before reading the status page. */ 632 * ACTHD) before reading the status page. */
635 if (IS_GEN6(dev) || IS_GEN7(dev)) 633 if (!lazy_coherency)
636 intel_ring_get_active_head(ring); 634 intel_ring_get_active_head(ring);
637 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 635 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
638} 636}
639 637
640static u32 638static u32
641ring_get_seqno(struct intel_ring_buffer *ring) 639ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
642{ 640{
643 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 641 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
644} 642}
645 643
646static u32 644static u32
647pc_render_get_seqno(struct intel_ring_buffer *ring) 645pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
648{ 646{
649 struct pipe_control *pc = ring->private; 647 struct pipe_control *pc = ring->private;
650 return pc->cpu_page[0]; 648 return pc->cpu_page[0];
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8b2b92e00e9d..2ea7a311a1f0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -72,7 +72,14 @@ struct intel_ring_buffer {
72 u32 flush_domains); 72 u32 flush_domains);
73 int (*add_request)(struct intel_ring_buffer *ring, 73 int (*add_request)(struct intel_ring_buffer *ring,
74 u32 *seqno); 74 u32 *seqno);
75 u32 (*get_seqno)(struct intel_ring_buffer *ring); 75 /* Some chipsets are not quite as coherent as advertised and need
76 * an expensive kick to force a true read of the up-to-date seqno.
77 * However, the up-to-date seqno is not always required and the last
78 * seen value is good enough. Note that the seqno will always be
79 * monotonic, even if not coherent.
80 */
81 u32 (*get_seqno)(struct intel_ring_buffer *ring,
82 bool lazy_coherency);
76 int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, 83 int (*dispatch_execbuffer)(struct intel_ring_buffer *ring,
77 u32 offset, u32 length); 84 u32 offset, u32 length);
78 void (*cleanup)(struct intel_ring_buffer *ring); 85 void (*cleanup)(struct intel_ring_buffer *ring);