summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2015-01-22 08:42:00 -0500
committerJani Nikula <jani.nikula@intel.com>2015-02-09 13:03:15 -0500
commitf0a1fb10e5f79f5aaf8d7e94b9fa6bf2fa9aeebf (patch)
tree7631f5895d3be97bef2c09f4c9404d03733eb588 /drivers/gpu
parent0ca09685546fed5fc8f0535204f0626f352140f4 (diff)
drm/i915: Insert a command barrier on BLT/BSD cache flushes
This looked like an odd regression from commit ec5cc0f9b019af95e4571a9fa162d94294c8d90b Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu Jun 12 10:28:55 2014 +0100 drm/i915: Restrict GPU boost to the RCS engine but in reality it undercovered a much older coherency bug. The issue that boosting the GPU frequency on the BCS ring was masking was that we could wake the CPU up after completion of a BCS batch and inspect memory prior to the write cache being fully evicted. In order to serialise the breadcrumb interrupt (and so ensure that the CPU's view of memory is coherent) we need to perform a post-sync operation in the MI_FLUSH_DW. v2: Fix all the MI_FLUSH_DW (bsd plus the duplication in execlists). Also fix the invalidate_domains mask in gen8_emit_flush() for ring != VCS. Testcase: gpuX-rcs-gpu-read-after-write Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org Acked-by: Daniel Vetter <daniel@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c20
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c23
2 files changed, 30 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a94346fee160..0f358c5999ec 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1211,15 +1211,17 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
1211 1211
1212 cmd = MI_FLUSH_DW + 1; 1212 cmd = MI_FLUSH_DW + 1;
1213 1213
1214 if (ring == &dev_priv->ring[VCS]) { 1214 /* We always require a command barrier so that subsequent
1215 if (invalidate_domains & I915_GEM_GPU_DOMAINS) 1215 * commands, such as breadcrumb interrupts, are strictly ordered
1216 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 1216 * wrt the contents of the write cache being flushed to memory
1217 MI_FLUSH_DW_STORE_INDEX | 1217 * (and thus being coherent from the CPU).
1218 MI_FLUSH_DW_OP_STOREDW; 1218 */
1219 } else { 1219 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1220 if (invalidate_domains & I915_GEM_DOMAIN_RENDER) 1220
1221 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 1221 if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
1222 MI_FLUSH_DW_OP_STOREDW; 1222 cmd |= MI_INVALIDATE_TLB;
1223 if (ring == &dev_priv->ring[VCS])
1224 cmd |= MI_INVALIDATE_BSD;
1223 } 1225 }
1224 1226
1225 intel_logical_ring_emit(ringbuf, cmd); 1227 intel_logical_ring_emit(ringbuf, cmd);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0bd3976d88e1..e5b3c6dbd467 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2240,6 +2240,14 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
2240 cmd = MI_FLUSH_DW; 2240 cmd = MI_FLUSH_DW;
2241 if (INTEL_INFO(ring->dev)->gen >= 8) 2241 if (INTEL_INFO(ring->dev)->gen >= 8)
2242 cmd += 1; 2242 cmd += 1;
2243
2244 /* We always require a command barrier so that subsequent
2245 * commands, such as breadcrumb interrupts, are strictly ordered
2246 * wrt the contents of the write cache being flushed to memory
2247 * (and thus being coherent from the CPU).
2248 */
2249 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2250
2243 /* 2251 /*
2244 * Bspec vol 1c.5 - video engine command streamer: 2252 * Bspec vol 1c.5 - video engine command streamer:
2245 * "If ENABLED, all TLBs will be invalidated once the flush 2253 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -2247,8 +2255,8 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
2247 * Post-Sync Operation field is a value of 1h or 3h." 2255 * Post-Sync Operation field is a value of 1h or 3h."
2248 */ 2256 */
2249 if (invalidate & I915_GEM_GPU_DOMAINS) 2257 if (invalidate & I915_GEM_GPU_DOMAINS)
2250 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 2258 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2251 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 2259
2252 intel_ring_emit(ring, cmd); 2260 intel_ring_emit(ring, cmd);
2253 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2261 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2254 if (INTEL_INFO(ring->dev)->gen >= 8) { 2262 if (INTEL_INFO(ring->dev)->gen >= 8) {
@@ -2344,6 +2352,14 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
2344 cmd = MI_FLUSH_DW; 2352 cmd = MI_FLUSH_DW;
2345 if (INTEL_INFO(ring->dev)->gen >= 8) 2353 if (INTEL_INFO(ring->dev)->gen >= 8)
2346 cmd += 1; 2354 cmd += 1;
2355
2356 /* We always require a command barrier so that subsequent
2357 * commands, such as breadcrumb interrupts, are strictly ordered
2358 * wrt the contents of the write cache being flushed to memory
2359 * (and thus being coherent from the CPU).
2360 */
2361 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2362
2347 /* 2363 /*
2348 * Bspec vol 1c.3 - blitter engine command streamer: 2364 * Bspec vol 1c.3 - blitter engine command streamer:
2349 * "If ENABLED, all TLBs will be invalidated once the flush 2365 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -2351,8 +2367,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
2351 * Post-Sync Operation field is a value of 1h or 3h." 2367 * Post-Sync Operation field is a value of 1h or 3h."
2352 */ 2368 */
2353 if (invalidate & I915_GEM_DOMAIN_RENDER) 2369 if (invalidate & I915_GEM_DOMAIN_RENDER)
2354 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 2370 cmd |= MI_INVALIDATE_TLB;
2355 MI_FLUSH_DW_OP_STOREDW;
2356 intel_ring_emit(ring, cmd); 2371 intel_ring_emit(ring, cmd);
2357 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2372 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2358 if (INTEL_INFO(ring->dev)->gen >= 8) { 2373 if (INTEL_INFO(ring->dev)->gen >= 8) {