aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-02-15 06:25:36 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-02-15 08:26:03 -0500
commita71d8d94525e8fd855c0466fb586ae1cb008f3a2 (patch)
tree816d919b02d90a28b1c7b25d05deed73dca22774 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent7c26e5c6edaec70f12984f7a3020864cc21e6fec (diff)
drm/i915: Record the tail at each request and use it to estimate the head
By recording the location of every request in the ringbuffer, we know that in order to retire the request the GPU must have finished reading it and so the GPU head is now beyond the tail of the request. We can therefore provide a conservative estimate of where the GPU is reading from in order to avoid having to read back the ring buffer registers when polling for space upon starting a new write into the ringbuffer. A secondary effect is that this allows us to convert intel_ring_buffer_wait() to use i915_wait_request() and so consolidate upon the single function to handle the complicated task of waiting upon the GPU. A necessary precaution is that we need to make that wait uninterruptible to match the existing conditions as all the callers of intel_ring_begin() have not been audited to handle ERESTARTSYS correctly. By using a conservative estimate for the head, and always processing all outstanding requests first, we prevent a race condition between using the estimate and direct reads of I915_RING_HEAD which could result in the value of the head going backwards, and the tail overflowing once again. We are also careful to mark any request that we skip over in order to free space in ring as consumed which provides a self-consistency check. Given sufficient abuse, such as a set of unthrottled GPU bound cairo-traces, avoiding the use of I915_RING_HEAD gives a 10-20% boost on Sandy Bridge (i5-2520m): firefox-paintball 18927ms -> 15646ms: 1.21x speedup firefox-fishtank 12563ms -> 11278ms: 1.11x speedup which is a mild consolation for the performance those traces achieved from exploiting the buggy autoreported head. v2: Add a few more comments and make request->tail a conservative estimate as suggested by Daniel Vetter. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: resolve conflicts with retirement defering and the lack of the autoreport head removal (that will go in through -fixes).] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c83
1 files changed, 83 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e784ebb8cc27..ca3972f2c6f5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -583,6 +583,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
583 PIPE_CONTROL_FLUSH(ring, scratch_addr); 583 PIPE_CONTROL_FLUSH(ring, scratch_addr);
584 scratch_addr += 128; 584 scratch_addr += 128;
585 PIPE_CONTROL_FLUSH(ring, scratch_addr); 585 PIPE_CONTROL_FLUSH(ring, scratch_addr);
586
586 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 587 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
587 PIPE_CONTROL_WRITE_FLUSH | 588 PIPE_CONTROL_WRITE_FLUSH |
588 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 589 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
@@ -1107,11 +1108,89 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1107 return 0; 1108 return 0;
1108} 1109}
1109 1110
1111static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1112{
1113 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1114 bool was_interruptible;
1115 int ret;
1116
1117 /* XXX As we have not yet audited all the paths to check that
1118 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1119 * allow us to be interruptible by a signal.
1120 */
1121 was_interruptible = dev_priv->mm.interruptible;
1122 dev_priv->mm.interruptible = false;
1123
1124 ret = i915_wait_request(ring, seqno, true);
1125
1126 dev_priv->mm.interruptible = was_interruptible;
1127
1128 return ret;
1129}
1130
1131static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1132{
1133 struct drm_i915_gem_request *request;
1134 u32 seqno = 0;
1135 int ret;
1136
1137 i915_gem_retire_requests_ring(ring);
1138
1139 if (ring->last_retired_head != -1) {
1140 ring->head = ring->last_retired_head;
1141 ring->last_retired_head = -1;
1142 ring->space = ring_space(ring);
1143 if (ring->space >= n)
1144 return 0;
1145 }
1146
1147 list_for_each_entry(request, &ring->request_list, list) {
1148 int space;
1149
1150 if (request->tail == -1)
1151 continue;
1152
1153 space = request->tail - (ring->tail + 8);
1154 if (space < 0)
1155 space += ring->size;
1156 if (space >= n) {
1157 seqno = request->seqno;
1158 break;
1159 }
1160
1161 /* Consume this request in case we need more space than
1162 * is available and so need to prevent a race between
1163 * updating last_retired_head and direct reads of
1164 * I915_RING_HEAD. It also provides a nice sanity check.
1165 */
1166 request->tail = -1;
1167 }
1168
1169 if (seqno == 0)
1170 return -ENOSPC;
1171
1172 ret = intel_ring_wait_seqno(ring, seqno);
1173 if (ret)
1174 return ret;
1175
1176 if (WARN_ON(ring->last_retired_head == -1))
1177 return -ENOSPC;
1178
1179 ring->head = ring->last_retired_head;
1180 ring->last_retired_head = -1;
1181 ring->space = ring_space(ring);
1182 if (WARN_ON(ring->space < n))
1183 return -ENOSPC;
1184
1185 return 0;
1186}
1187
1110int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) 1188int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1111{ 1189{
1112 struct drm_device *dev = ring->dev; 1190 struct drm_device *dev = ring->dev;
1113 struct drm_i915_private *dev_priv = dev->dev_private; 1191 struct drm_i915_private *dev_priv = dev->dev_private;
1114 unsigned long end; 1192 unsigned long end;
1193 int ret;
1115 u32 head; 1194 u32 head;
1116 1195
1117 /* If the reported head position has wrapped or hasn't advanced, 1196 /* If the reported head position has wrapped or hasn't advanced,
@@ -1125,6 +1204,10 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1125 return 0; 1204 return 0;
1126 } 1205 }
1127 1206
1207 ret = intel_ring_wait_request(ring, n);
1208 if (ret != -ENOSPC)
1209 return ret;
1210
1128 trace_i915_ring_wait_begin(ring); 1211 trace_i915_ring_wait_begin(ring);
1129 if (drm_core_check_feature(dev, DRIVER_GEM)) 1212 if (drm_core_check_feature(dev, DRIVER_GEM))
1130 /* With GEM the hangcheck timer should kick us out of the loop, 1213 /* With GEM the hangcheck timer should kick us out of the loop,