diff options
author | Jesse Barnes <jbarnes@virtuousgeek.org> | 2011-10-16 04:23:31 -0400 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2011-10-20 18:26:41 -0400 |
commit | 8d31528703ceda6f631e39953130abe9b3ca52b2 (patch) | |
tree | f64ff55c111adb9e479cad97ceede6174b824aa6 /drivers/gpu/drm/i915/intel_ringbuffer.c | |
parent | 9d971b37534fb268251f74cc04a36a0a16f7da04 (diff) |
drm/i915: Use PIPE_CONTROL for flushing on gen6+.
v2 by danvet: Use a new flag to flush the render target cache on gen6+
(hw reuses the old write flush bit), as suggested by Ben Widawsdy.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
[danvet: this seems to fix cairo-perf-trace hangs on my snb]
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Keith Packard <keithp@keithp.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 136 |
1 files changed, 124 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ca8363531a64..ca70e2f10445 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -34,6 +34,16 @@ | |||
34 | #include "i915_trace.h" | 34 | #include "i915_trace.h" |
35 | #include "intel_drv.h" | 35 | #include "intel_drv.h" |
36 | 36 | ||
37 | /* | ||
38 | * 965+ support PIPE_CONTROL commands, which provide finer grained control | ||
39 | * over cache flushing. | ||
40 | */ | ||
41 | struct pipe_control { | ||
42 | struct drm_i915_gem_object *obj; | ||
43 | volatile u32 *cpu_page; | ||
44 | u32 gtt_offset; | ||
45 | }; | ||
46 | |||
37 | static inline int ring_space(struct intel_ring_buffer *ring) | 47 | static inline int ring_space(struct intel_ring_buffer *ring) |
38 | { | 48 | { |
39 | int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); | 49 | int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); |
@@ -123,6 +133,118 @@ render_ring_flush(struct intel_ring_buffer *ring, | |||
123 | return 0; | 133 | return 0; |
124 | } | 134 | } |
125 | 135 | ||
136 | /** | ||
137 | * Emits a PIPE_CONTROL with a non-zero post-sync operation, for | ||
138 | * implementing two workarounds on gen6. From section 1.4.7.1 | ||
139 | * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: | ||
140 | * | ||
141 | * [DevSNB-C+{W/A}] Before any depth stall flush (including those | ||
142 | * produced by non-pipelined state commands), software needs to first | ||
143 | * send a PIPE_CONTROL with no bits set except Post-Sync Operation != | ||
144 | * 0. | ||
145 | * | ||
146 | * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable | ||
147 | * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. | ||
148 | * | ||
149 | * And the workaround for these two requires this workaround first: | ||
150 | * | ||
151 | * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent | ||
152 | * BEFORE the pipe-control with a post-sync op and no write-cache | ||
153 | * flushes. | ||
154 | * | ||
155 | * And this last workaround is tricky because of the requirements on | ||
156 | * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM | ||
157 | * volume 2 part 1: | ||
158 | * | ||
159 | * "1 of the following must also be set: | ||
160 | * - Render Target Cache Flush Enable ([12] of DW1) | ||
161 | * - Depth Cache Flush Enable ([0] of DW1) | ||
162 | * - Stall at Pixel Scoreboard ([1] of DW1) | ||
163 | * - Depth Stall ([13] of DW1) | ||
164 | * - Post-Sync Operation ([13] of DW1) | ||
165 | * - Notify Enable ([8] of DW1)" | ||
166 | * | ||
167 | * The cache flushes require the workaround flush that triggered this | ||
168 | * one, so we can't use it. Depth stall would trigger the same. | ||
169 | * Post-sync nonzero is what triggered this second workaround, so we | ||
170 | * can't use that one either. Notify enable is IRQs, which aren't | ||
171 | * really our business. That leaves only stall at scoreboard. | ||
172 | */ | ||
173 | static int | ||
174 | intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) | ||
175 | { | ||
176 | struct pipe_control *pc = ring->private; | ||
177 | u32 scratch_addr = pc->gtt_offset + 128; | ||
178 | int ret; | ||
179 | |||
180 | |||
181 | ret = intel_ring_begin(ring, 6); | ||
182 | if (ret) | ||
183 | return ret; | ||
184 | |||
185 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); | ||
186 | intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | | ||
187 | PIPE_CONTROL_STALL_AT_SCOREBOARD); | ||
188 | intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ | ||
189 | intel_ring_emit(ring, 0); /* low dword */ | ||
190 | intel_ring_emit(ring, 0); /* high dword */ | ||
191 | intel_ring_emit(ring, MI_NOOP); | ||
192 | intel_ring_advance(ring); | ||
193 | |||
194 | ret = intel_ring_begin(ring, 6); | ||
195 | if (ret) | ||
196 | return ret; | ||
197 | |||
198 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); | ||
199 | intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); | ||
200 | intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ | ||
201 | intel_ring_emit(ring, 0); | ||
202 | intel_ring_emit(ring, 0); | ||
203 | intel_ring_emit(ring, MI_NOOP); | ||
204 | intel_ring_advance(ring); | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static int | ||
210 | gen6_render_ring_flush(struct intel_ring_buffer *ring, | ||
211 | u32 invalidate_domains, u32 flush_domains) | ||
212 | { | ||
213 | u32 flags = 0; | ||
214 | struct pipe_control *pc = ring->private; | ||
215 | u32 scratch_addr = pc->gtt_offset + 128; | ||
216 | int ret; | ||
217 | |||
218 | /* Force SNB workarounds for PIPE_CONTROL flushes */ | ||
219 | intel_emit_post_sync_nonzero_flush(ring); | ||
220 | |||
221 | /* Just flush everything. Experiments have shown that reducing the | ||
222 | * number of bits based on the write domains has little performance | ||
223 | * impact. | ||
224 | */ | ||
225 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | ||
226 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | ||
227 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | ||
228 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | ||
229 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | ||
230 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | ||
231 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | ||
232 | |||
233 | ret = intel_ring_begin(ring, 6); | ||
234 | if (ret) | ||
235 | return ret; | ||
236 | |||
237 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); | ||
238 | intel_ring_emit(ring, flags); | ||
239 | intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); | ||
240 | intel_ring_emit(ring, 0); /* lower dword */ | ||
241 | intel_ring_emit(ring, 0); /* uppwer dword */ | ||
242 | intel_ring_emit(ring, MI_NOOP); | ||
243 | intel_ring_advance(ring); | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
126 | static void ring_write_tail(struct intel_ring_buffer *ring, | 248 | static void ring_write_tail(struct intel_ring_buffer *ring, |
127 | u32 value) | 249 | u32 value) |
128 | { | 250 | { |
@@ -206,16 +328,6 @@ static int init_ring_common(struct intel_ring_buffer *ring) | |||
206 | return 0; | 328 | return 0; |
207 | } | 329 | } |
208 | 330 | ||
209 | /* | ||
210 | * 965+ support PIPE_CONTROL commands, which provide finer grained control | ||
211 | * over cache flushing. | ||
212 | */ | ||
213 | struct pipe_control { | ||
214 | struct drm_i915_gem_object *obj; | ||
215 | volatile u32 *cpu_page; | ||
216 | u32 gtt_offset; | ||
217 | }; | ||
218 | |||
219 | static int | 331 | static int |
220 | init_pipe_control(struct intel_ring_buffer *ring) | 332 | init_pipe_control(struct intel_ring_buffer *ring) |
221 | { | 333 | { |
@@ -296,8 +408,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) | |||
296 | GFX_MODE_ENABLE(GFX_REPLAY_MODE)); | 408 | GFX_MODE_ENABLE(GFX_REPLAY_MODE)); |
297 | } | 409 | } |
298 | 410 | ||
299 | if (INTEL_INFO(dev)->gen >= 6) { | 411 | if (INTEL_INFO(dev)->gen >= 5) { |
300 | } else if (IS_GEN5(dev)) { | ||
301 | ret = init_pipe_control(ring); | 412 | ret = init_pipe_control(ring); |
302 | if (ret) | 413 | if (ret) |
303 | return ret; | 414 | return ret; |
@@ -1360,6 +1471,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) | |||
1360 | *ring = render_ring; | 1471 | *ring = render_ring; |
1361 | if (INTEL_INFO(dev)->gen >= 6) { | 1472 | if (INTEL_INFO(dev)->gen >= 6) { |
1362 | ring->add_request = gen6_add_request; | 1473 | ring->add_request = gen6_add_request; |
1474 | ring->flush = gen6_render_ring_flush; | ||
1363 | ring->irq_get = gen6_render_ring_get_irq; | 1475 | ring->irq_get = gen6_render_ring_get_irq; |
1364 | ring->irq_put = gen6_render_ring_put_irq; | 1476 | ring->irq_put = gen6_render_ring_put_irq; |
1365 | } else if (IS_GEN5(dev)) { | 1477 | } else if (IS_GEN5(dev)) { |