aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.h
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-02-15 06:25:36 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-02-15 08:26:03 -0500
commita71d8d94525e8fd855c0466fb586ae1cb008f3a2 (patch)
tree816d919b02d90a28b1c7b25d05deed73dca22774 /drivers/gpu/drm/i915/intel_ringbuffer.h
parent7c26e5c6edaec70f12984f7a3020864cc21e6fec (diff)
drm/i915: Record the tail at each request and use it to estimate the head
By recording the location of every request in the ringbuffer, we know that in order to retire the request the GPU must have finished reading it and so the GPU head is now beyond the tail of the request. We can therefore provide a conservative estimate of where the GPU is reading from in order to avoid having to read back the ring buffer registers when polling for space upon starting a new write into the ringbuffer. A secondary effect is that this allows us to convert intel_ring_buffer_wait() to use i915_wait_request() and so consolidate upon the single function to handle the complicated task of waiting upon the GPU. A necessary precaution is that we need to make that wait uninterruptible to match the existing conditions as all the callers of intel_ring_begin() have not been audited to handle ERESTARTSYS correctly. By using a conservative estimate for the head, and always processing all outstanding requests first, we prevent a race condition between using the estimate and direct reads of I915_RING_HEAD which could result in the value of the head going backwards, and the tail overflowing once again. We are also careful to mark any request that we skip over in order to free space in ring as consumed which provides a self-consistency check. Given sufficient abuse, such as a set of unthrottled GPU bound cairo-traces, avoiding the use of I915_RING_HEAD gives a 10-20% boost on Sandy Bridge (i5-2520m): firefox-paintball 18927ms -> 15646ms: 1.21x speedup firefox-fishtank 12563ms -> 11278ms: 1.11x speedup which is a mild consolation for the performance those traces achieved from exploiting the buggy autoreported head. v2: Add a few more comments and make request->tail a conservative estimate as suggested by Daniel Vetter. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: resolve conflicts with retirement defering and the lack of the autoreport head removal (that will go in through -fixes).] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h15
1 files changed, 15 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c8b9cc0cd0dc..bc0365b8fa4d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -46,6 +46,16 @@ struct intel_ring_buffer {
46 int effective_size; 46 int effective_size;
47 struct intel_hw_status_page status_page; 47 struct intel_hw_status_page status_page;
48 48
49 /** We track the position of the requests in the ring buffer, and
50 * when each is retired we increment last_retired_head as the GPU
51 * must have finished processing the request and so we know we
52 * can advance the ringbuffer up to that position.
53 *
54 * last_retired_head is set to -1 after the value is consumed so
55 * we can detect new retirements.
56 */
57 u32 last_retired_head;
58
49 spinlock_t irq_lock; 59 spinlock_t irq_lock;
50 u32 irq_refcount; 60 u32 irq_refcount;
51 u32 irq_mask; 61 u32 irq_mask;
@@ -193,6 +203,11 @@ int intel_init_blt_ring_buffer(struct drm_device *dev);
193u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); 203u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
194void intel_ring_setup_status_page(struct intel_ring_buffer *ring); 204void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
195 205
206static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
207{
208 return ring->tail;
209}
210
196static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) 211static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno)
197{ 212{
198 if (ring->trace_irq_seqno == 0 && ring->irq_get(ring)) 213 if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))