diff options
author | Paulo Zanoni <paulo.r.zanoni@intel.com> | 2012-08-17 17:35:43 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-09-03 04:09:26 -0400 |
commit | f39876317a69a104eeaed002d4085348e871bfd1 (patch) | |
tree | f7fc8fe0fb361c8e68c5a25dcd45ed48f9508bef | |
parent | b31115092724925a434905dc3dbf83a2e752ba4b (diff) |
drm/i915: add workarounds to gen7_render_ring_flush
From Bspec, Vol 2a, Section 1.9.3.4 "PIPE_CONTROL", intro section
detailing the various workarounds:
"[DevIVB {W/A}, DevHSW {W/A}]: Pipe_control with CS-stall bit
set must be issued before a pipe-control command that has the State
Cache Invalidate bit set."
Note that public Bspec has different numbering, it's Vol2Part1,
Section 1.10.4.1 "PIPE_CONTROL" there.
There's also a second workaround for the PIPE_CONTROL command itself:
"[DevIVB, DevVLV, DevHSW] {WA}: Every 4th PIPE_CONTROL command, not
counting the PIPE_CONTROL with only read-cache-invalidate bit(s) set,
must have a CS_STALL bit set"
For simplicity we simply set the CS_STALL bit on every pipe_control on
gen7+
Note that this massively helps on some hsw machines, together with the
following patch to unconditionally set the CS_STALL bit on every
pipe_control it prevents a gpu hang every few seconds.
This is a regression that has been introduced in the pipe_control
cleanup:
commit 6c6cf5aa9c583478b19e23149feaa92d01fb8c2d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri Jul 20 18:02:28 2012 +0100
drm/i915: Only apply the SNB pipe control w/a to gen6
It looks like the massive snb pipe_control workaround also papered
over any issues on ivb and hsw.
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
[danvet: squashed both workarounds together, pimped commit message
with Bsepc citations, regression commit citation and changed the
comment in the code a bit to clarify that we unconditionally set
CS_STALL to avoid being hurt by trying to be clever.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 42a4b85b0eae..55cdb4d30a16 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -263,6 +263,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, | |||
263 | } | 263 | } |
264 | 264 | ||
265 | static int | 265 | static int |
266 | gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) | ||
267 | { | ||
268 | int ret; | ||
269 | |||
270 | ret = intel_ring_begin(ring, 4); | ||
271 | if (ret) | ||
272 | return ret; | ||
273 | |||
274 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); | ||
275 | intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | | ||
276 | PIPE_CONTROL_STALL_AT_SCOREBOARD); | ||
277 | intel_ring_emit(ring, 0); | ||
278 | intel_ring_emit(ring, 0); | ||
279 | intel_ring_advance(ring); | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static int | ||
266 | gen7_render_ring_flush(struct intel_ring_buffer *ring, | 285 | gen7_render_ring_flush(struct intel_ring_buffer *ring, |
267 | u32 invalidate_domains, u32 flush_domains) | 286 | u32 invalidate_domains, u32 flush_domains) |
268 | { | 287 | { |
@@ -271,6 +290,16 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, | |||
271 | u32 scratch_addr = pc->gtt_offset + 128; | 290 | u32 scratch_addr = pc->gtt_offset + 128; |
272 | int ret; | 291 | int ret; |
273 | 292 | ||
293 | /* | ||
294 | * Ensure that any following seqno writes only happen when the render | ||
295 | * cache is indeed flushed. | ||
296 | * | ||
297 | * Workaround: 4th PIPE_CONTROL command (except the ones with only | ||
298 | * read-cache invalidate bits set) must have the CS_STALL bit set. We | ||
299 | * don't try to be clever and just set it unconditionally. | ||
300 | */ | ||
301 | flags |= PIPE_CONTROL_CS_STALL; | ||
302 | |||
274 | /* Just flush everything. Experiments have shown that reducing the | 303 | /* Just flush everything. Experiments have shown that reducing the |
275 | * number of bits based on the write domains has little performance | 304 | * number of bits based on the write domains has little performance |
276 | * impact. | 305 | * impact. |
@@ -278,11 +307,6 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, | |||
278 | if (flush_domains) { | 307 | if (flush_domains) { |
279 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | 308 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; |
280 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | 309 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; |
281 | /* | ||
282 | * Ensure that any following seqno writes only happen | ||
283 | * when the render cache is indeed flushed. | ||
284 | */ | ||
285 | flags |= PIPE_CONTROL_CS_STALL; | ||
286 | } | 310 | } |
287 | if (invalidate_domains) { | 311 | if (invalidate_domains) { |
288 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | 312 | flags |= PIPE_CONTROL_TLB_INVALIDATE; |
@@ -295,6 +319,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, | |||
295 | * TLB invalidate requires a post-sync write. | 319 | * TLB invalidate requires a post-sync write. |
296 | */ | 320 | */ |
297 | flags |= PIPE_CONTROL_QW_WRITE; | 321 | flags |= PIPE_CONTROL_QW_WRITE; |
322 | |||
323 | /* Workaround: we must issue a pipe_control with CS-stall bit | ||
324 | * set before a pipe_control command that has the state cache | ||
325 | * invalidate bit set. */ | ||
326 | gen7_render_ring_cs_stall_wa(ring); | ||
298 | } | 327 | } |
299 | 328 | ||
300 | ret = intel_ring_begin(ring, 4); | 329 | ret = intel_ring_begin(ring, 4); |