diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-11-05 04:43:05 -0500 |
---|---|---|
committer | Joonas Lahtinen <joonas.lahtinen@linux.intel.com> | 2018-11-12 10:07:12 -0500 |
commit | fb5bbae9b1333d44023713946fdd28db0cd85751 (patch) | |
tree | c1a1fdc3c6d7c886a43d704cadd1b685a714c34d | |
parent | ccda4af0f4b92f7b4c308d3acc262f4a7e3affad (diff) |
drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5
Exercising the gpu reloc path strenuously revealed an issue where the
updated relocations (from MI_STORE_DWORD_IMM) were not being observed
upon execution. After some experiments with adding pipecontrols (a lot
of pipecontrols (32) as gen4/5 do not have a bit to wait on earlier pipe
controls or even the current on), it was discovered that we merely
needed to delay the EMIT_INVALIDATE by several flushes. It is important
to note that it is the EMIT_INVALIDATE as opposed to the EMIT_FLUSH that
needs the delay as opposed to what one might first expect -- that the
delay is required for the TLB invalidation to take effect (one presumes
to purge any CS buffers) as opposed to a delay after flushing to ensure
the writes have landed before triggering invalidation.
Testcase: igt/gem_tiled_fence_blits
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181105094305.5767-1-chris@chris-wilson.co.uk
(cherry picked from commit 55f99bf2a9c331838c981694bc872cd1ec4070b2)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 38 |
1 files changed, 36 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d0ef50bf930a..187bb0ceb4ac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -91,6 +91,7 @@ static int | |||
91 | gen4_render_ring_flush(struct i915_request *rq, u32 mode) | 91 | gen4_render_ring_flush(struct i915_request *rq, u32 mode) |
92 | { | 92 | { |
93 | u32 cmd, *cs; | 93 | u32 cmd, *cs; |
94 | int i; | ||
94 | 95 | ||
95 | /* | 96 | /* |
96 | * read/write caches: | 97 | * read/write caches: |
@@ -127,12 +128,45 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) | |||
127 | cmd |= MI_INVALIDATE_ISP; | 128 | cmd |= MI_INVALIDATE_ISP; |
128 | } | 129 | } |
129 | 130 | ||
130 | cs = intel_ring_begin(rq, 2); | 131 | i = 2; |
132 | if (mode & EMIT_INVALIDATE) | ||
133 | i += 20; | ||
134 | |||
135 | cs = intel_ring_begin(rq, i); | ||
131 | if (IS_ERR(cs)) | 136 | if (IS_ERR(cs)) |
132 | return PTR_ERR(cs); | 137 | return PTR_ERR(cs); |
133 | 138 | ||
134 | *cs++ = cmd; | 139 | *cs++ = cmd; |
135 | *cs++ = MI_NOOP; | 140 | |
141 | /* | ||
142 | * A random delay to let the CS invalidate take effect? Without this | ||
143 | * delay, the GPU relocation path fails as the CS does not see | ||
144 | * the updated contents. Just as important, if we apply the flushes | ||
145 | * to the EMIT_FLUSH branch (i.e. immediately after the relocation | ||
146 | * write and before the invalidate on the next batch), the relocations | ||
147 | * still fail. This implies that is a delay following invalidation | ||
148 | * that is required to reset the caches as opposed to a delay to | ||
149 | * ensure the memory is written. | ||
150 | */ | ||
151 | if (mode & EMIT_INVALIDATE) { | ||
152 | *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; | ||
153 | *cs++ = i915_ggtt_offset(rq->engine->scratch) | | ||
154 | PIPE_CONTROL_GLOBAL_GTT; | ||
155 | *cs++ = 0; | ||
156 | *cs++ = 0; | ||
157 | |||
158 | for (i = 0; i < 12; i++) | ||
159 | *cs++ = MI_FLUSH; | ||
160 | |||
161 | *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; | ||
162 | *cs++ = i915_ggtt_offset(rq->engine->scratch) | | ||
163 | PIPE_CONTROL_GLOBAL_GTT; | ||
164 | *cs++ = 0; | ||
165 | *cs++ = 0; | ||
166 | } | ||
167 | |||
168 | *cs++ = cmd; | ||
169 | |||
136 | intel_ring_advance(rq, cs); | 170 | intel_ring_advance(rq, cs); |
137 | 171 | ||
138 | return 0; | 172 | return 0; |