aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-08-10 17:16:32 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-09-10 09:35:58 -0400
commit092467327c45edbfce6c2bb71ee842bec16b9a60 (patch)
treeb63b390695439713d344a1df70613281a6e55187 /drivers/gpu/drm/i915/intel_ringbuffer.c
parentad0d6dc4859ffb769768b64942b6a110e92acd21 (diff)
drm/i915: Write RING_TAIL once per-request
Ignoring the legacy DRI1 code, and a couple of special cases (to be discussed later), all access to the ring is mediated through requests. The first write to a ring will grab a seqno and mark the ring as having an outstanding_lazy_request. Either through explicitly adding a request after an execbuffer or through an implicit wait (either by the CPU or by a semaphore), that sequence of writes will be terminated with a request. So we can ellide all the intervening writes to the tail register and send the entire command stream to the GPU at once. This will reduce the number of *serialising* writes to the tail register by a factor or 3-5 times (depending upon architecture and number of workarounds, context switches, etc involved). This becomes even more noticeable when the register write is overloaded with a number of debugging tools. The astute reader will wonder if it is then possible to overflow the ring with a single command. It is not. When we start a command sequence to the ring, we check for available space and issue a wait in case we have not. The ring wait will in this case be forced to flush the outstanding register write and then poll the ACTHD for sufficient space to continue. The exception to the rule where everything is inside a request are a few initialisation cases where we may want to write GPU commands via the CS before userspace wakes up and page flips. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c30
1 files changed, 16 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 284afaf5d6ff..686e5b23481d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -41,6 +41,16 @@ static inline int ring_space(struct intel_ring_buffer *ring)
41 return space; 41 return space;
42} 42}
43 43
44void __intel_ring_advance(struct intel_ring_buffer *ring)
45{
46 struct drm_i915_private *dev_priv = ring->dev->dev_private;
47
48 ring->tail &= ring->size - 1;
49 if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
50 return;
51 ring->write_tail(ring, ring->tail);
52}
53
44static int 54static int
45gen2_render_ring_flush(struct intel_ring_buffer *ring, 55gen2_render_ring_flush(struct intel_ring_buffer *ring,
46 u32 invalidate_domains, 56 u32 invalidate_domains,
@@ -631,7 +641,7 @@ gen6_add_request(struct intel_ring_buffer *ring)
631 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 641 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
632 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 642 intel_ring_emit(ring, ring->outstanding_lazy_seqno);
633 intel_ring_emit(ring, MI_USER_INTERRUPT); 643 intel_ring_emit(ring, MI_USER_INTERRUPT);
634 intel_ring_advance(ring); 644 __intel_ring_advance(ring);
635 645
636 return 0; 646 return 0;
637} 647}
@@ -744,7 +754,7 @@ pc_render_add_request(struct intel_ring_buffer *ring)
744 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 754 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
745 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 755 intel_ring_emit(ring, ring->outstanding_lazy_seqno);
746 intel_ring_emit(ring, 0); 756 intel_ring_emit(ring, 0);
747 intel_ring_advance(ring); 757 __intel_ring_advance(ring);
748 758
749 return 0; 759 return 0;
750} 760}
@@ -965,7 +975,7 @@ i9xx_add_request(struct intel_ring_buffer *ring)
965 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 975 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
966 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 976 intel_ring_emit(ring, ring->outstanding_lazy_seqno);
967 intel_ring_emit(ring, MI_USER_INTERRUPT); 977 intel_ring_emit(ring, MI_USER_INTERRUPT);
968 intel_ring_advance(ring); 978 __intel_ring_advance(ring);
969 979
970 return 0; 980 return 0;
971} 981}
@@ -1414,6 +1424,9 @@ static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1414 if (ret != -ENOSPC) 1424 if (ret != -ENOSPC)
1415 return ret; 1425 return ret;
1416 1426
1427 /* force the tail write in case we have been skipping them */
1428 __intel_ring_advance(ring);
1429
1417 trace_i915_ring_wait_begin(ring); 1430 trace_i915_ring_wait_begin(ring);
1418 /* With GEM the hangcheck timer should kick us out of the loop, 1431 /* With GEM the hangcheck timer should kick us out of the loop,
1419 * leaving it early runs the risk of corrupting GEM state (due 1432 * leaving it early runs the risk of corrupting GEM state (due
@@ -1568,17 +1581,6 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno)
1568 ring->hangcheck.seqno = seqno; 1581 ring->hangcheck.seqno = seqno;
1569} 1582}
1570 1583
1571void intel_ring_advance(struct intel_ring_buffer *ring)
1572{
1573 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1574
1575 ring->tail &= ring->size - 1;
1576 if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
1577 return;
1578 ring->write_tail(ring, ring->tail);
1579}
1580
1581
1582static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1584static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1583 u32 value) 1585 u32 value)
1584{ 1586{