aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorPaulo Zanoni <paulo.r.zanoni@intel.com>2012-08-17 17:35:43 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-09-03 04:09:26 -0400
commitf39876317a69a104eeaed002d4085348e871bfd1 (patch)
treef7fc8fe0fb361c8e68c5a25dcd45ed48f9508bef /drivers/gpu/drm/i915/intel_ringbuffer.c
parentb31115092724925a434905dc3dbf83a2e752ba4b (diff)
drm/i915: add workarounds to gen7_render_ring_flush
From Bspec, Vol 2a, Section 1.9.3.4 "PIPE_CONTROL", intro section detailing the various workarounds: "[DevIVB {W/A}, DevHSW {W/A}]: Pipe_control with CS-stall bit set must be issued before a pipe-control command that has the State Cache Invalidate bit set." Note that public Bspec has different numbering, it's Vol2Part1, Section 1.10.4.1 "PIPE_CONTROL" there. There's also a second workaround for the PIPE_CONTROL command itself: "[DevIVB, DevVLV, DevHSW] {WA}: Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with only read-cache-invalidate bit(s) set, must have a CS_STALL bit set" For simplicity we simply set the CS_STALL bit on every pipe_control on gen7+ Note that this massively helps on some hsw machines, together with the following patch to unconditionally set the CS_STALL bit on every pipe_control it prevents a gpu hang every few seconds. This is a regression that has been introduced in the pipe_control cleanup: commit 6c6cf5aa9c583478b19e23149feaa92d01fb8c2d Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Fri Jul 20 18:02:28 2012 +0100 drm/i915: Only apply the SNB pipe control w/a to gen6 It looks like the massive snb pipe_control workaround also papered over any issues on ivb and hsw. Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [danvet: squashed both workarounds together, pimped commit message with Bsepc citations, regression commit citation and changed the comment in the code a bit to clarify that we unconditionally set CS_STALL to avoid being hurt by trying to be clever.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c39
1 files changed, 34 insertions, 5 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 42a4b85b0eae..55cdb4d30a16 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -263,6 +263,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
263} 263}
264 264
265static int 265static int
266gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
267{
268 int ret;
269
270 ret = intel_ring_begin(ring, 4);
271 if (ret)
272 return ret;
273
274 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
275 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
276 PIPE_CONTROL_STALL_AT_SCOREBOARD);
277 intel_ring_emit(ring, 0);
278 intel_ring_emit(ring, 0);
279 intel_ring_advance(ring);
280
281 return 0;
282}
283
284static int
266gen7_render_ring_flush(struct intel_ring_buffer *ring, 285gen7_render_ring_flush(struct intel_ring_buffer *ring,
267 u32 invalidate_domains, u32 flush_domains) 286 u32 invalidate_domains, u32 flush_domains)
268{ 287{
@@ -271,6 +290,16 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
271 u32 scratch_addr = pc->gtt_offset + 128; 290 u32 scratch_addr = pc->gtt_offset + 128;
272 int ret; 291 int ret;
273 292
293 /*
294 * Ensure that any following seqno writes only happen when the render
295 * cache is indeed flushed.
296 *
297 * Workaround: 4th PIPE_CONTROL command (except the ones with only
298 * read-cache invalidate bits set) must have the CS_STALL bit set. We
299 * don't try to be clever and just set it unconditionally.
300 */
301 flags |= PIPE_CONTROL_CS_STALL;
302
274 /* Just flush everything. Experiments have shown that reducing the 303 /* Just flush everything. Experiments have shown that reducing the
275 * number of bits based on the write domains has little performance 304 * number of bits based on the write domains has little performance
276 * impact. 305 * impact.
@@ -278,11 +307,6 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
278 if (flush_domains) { 307 if (flush_domains) {
279 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 308 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
280 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 309 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
281 /*
282 * Ensure that any following seqno writes only happen
283 * when the render cache is indeed flushed.
284 */
285 flags |= PIPE_CONTROL_CS_STALL;
286 } 310 }
287 if (invalidate_domains) { 311 if (invalidate_domains) {
288 flags |= PIPE_CONTROL_TLB_INVALIDATE; 312 flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -295,6 +319,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
295 * TLB invalidate requires a post-sync write. 319 * TLB invalidate requires a post-sync write.
296 */ 320 */
297 flags |= PIPE_CONTROL_QW_WRITE; 321 flags |= PIPE_CONTROL_QW_WRITE;
322
323 /* Workaround: we must issue a pipe_control with CS-stall bit
324 * set before a pipe_control command that has the state cache
325 * invalidate bit set. */
326 gen7_render_ring_cs_stall_wa(ring);
298 } 327 }
299 328
300 ret = intel_ring_begin(ring, 4); 329 ret = intel_ring_begin(ring, 4);