aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-12-28 10:31:14 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-12-28 11:36:57 -0500
commitcaa5915bb3c1e61bd895383364011a8921fee053 (patch)
treeaa1fa823c6d7e024e998e3f58907e946072d35e2 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent6a6237293d0c02e0902b29a86e3e353e21f7bea6 (diff)
drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
In preparation for removing the manual EMIT_FLUSH prior to emitting the breadcrumb implement the flush inline with writing the breadcrumb for ringbuffer emission. With a combined flush+breadcrumb, we can use a single operation to both flush and after the flush is complete (post-sync) write the breadcrumb. This gives us a strongly ordered operation that should be sufficient to serialise the write before we emit the interrupt; and therefore we may take the opportunity to remove the irq_seqno_barrier w/a for gen6+. Although using the PIPECONTROL to write the breadcrumb is slower than MI_STORE_DWORD_IMM, by combining the operations into one and removing the extra flush (next patch) it is faster For gen2-5, we simply combine the MI_FLUSH into the breadcrumb emission, though maybe we could find a solution here to the seqno-vs-interrupt issue on Ironlake by mixing up the flush? The answer is no, adding an MI_FLUSH before the interrupt is insufficient. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228153114.4948-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c106
1 files changed, 98 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 588294a3bbd2..fc1e29305951 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -217,7 +217,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
217 * really our business. That leaves only stall at scoreboard. 217 * really our business. That leaves only stall at scoreboard.
218 */ 218 */
219static int 219static int
220intel_emit_post_sync_nonzero_flush(struct i915_request *rq) 220gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
221{ 221{
222 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; 222 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
223 u32 *cs; 223 u32 *cs;
@@ -257,7 +257,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
257 int ret; 257 int ret;
258 258
259 /* Force SNB workarounds for PIPE_CONTROL flushes */ 259 /* Force SNB workarounds for PIPE_CONTROL flushes */
260 ret = intel_emit_post_sync_nonzero_flush(rq); 260 ret = gen6_emit_post_sync_nonzero_flush(rq);
261 if (ret) 261 if (ret)
262 return ret; 262 return ret;
263 263
@@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
300 return 0; 300 return 0;
301} 301}
302 302
303static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
304{
305 /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
306 *cs++ = GFX_OP_PIPE_CONTROL(4);
307 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
308 *cs++ = 0;
309 *cs++ = 0;
310
311 *cs++ = GFX_OP_PIPE_CONTROL(4);
312 *cs++ = PIPE_CONTROL_QW_WRITE;
313 *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
314 *cs++ = 0;
315
316 /* Finally we can flush and with it emit the breadcrumb */
317 *cs++ = GFX_OP_PIPE_CONTROL(4);
318 *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
319 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
320 PIPE_CONTROL_DC_FLUSH_ENABLE |
321 PIPE_CONTROL_QW_WRITE |
322 PIPE_CONTROL_CS_STALL);
323 *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
324 *cs++ = rq->global_seqno;
325
326 *cs++ = MI_USER_INTERRUPT;
327 *cs++ = MI_NOOP;
328
329 rq->tail = intel_ring_offset(rq, cs);
330 assert_ring_tail_valid(rq->ring, rq->tail);
331}
332static const int gen6_rcs_emit_breadcrumb_sz = 14;
333
303static int 334static int
304gen7_render_ring_cs_stall_wa(struct i915_request *rq) 335gen7_render_ring_cs_stall_wa(struct i915_request *rq)
305{ 336{
@@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
379 return 0; 410 return 0;
380} 411}
381 412
413static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
414{
415 *cs++ = GFX_OP_PIPE_CONTROL(4);
416 *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
417 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
418 PIPE_CONTROL_DC_FLUSH_ENABLE |
419 PIPE_CONTROL_FLUSH_ENABLE |
420 PIPE_CONTROL_QW_WRITE |
421 PIPE_CONTROL_GLOBAL_GTT_IVB |
422 PIPE_CONTROL_CS_STALL);
423 *cs++ = intel_hws_seqno_address(rq->engine);
424 *cs++ = rq->global_seqno;
425
426 *cs++ = MI_USER_INTERRUPT;
427 *cs++ = MI_NOOP;
428
429 rq->tail = intel_ring_offset(rq, cs);
430 assert_ring_tail_valid(rq->ring, rq->tail);
431}
432static const int gen7_rcs_emit_breadcrumb_sz = 6;
433
434static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
435{
436 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
437 *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
438 *cs++ = rq->global_seqno;
439 *cs++ = MI_USER_INTERRUPT;
440
441 rq->tail = intel_ring_offset(rq, cs);
442 assert_ring_tail_valid(rq->ring, rq->tail);
443}
444static const int gen6_xcs_emit_breadcrumb_sz = 4;
445
382static void set_hwstam(struct intel_engine_cs *engine, u32 mask) 446static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
383{ 447{
384 /* 448 /*
@@ -777,16 +841,20 @@ static void i9xx_submit_request(struct i915_request *request)
777 841
778static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) 842static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
779{ 843{
844 *cs++ = MI_FLUSH;
845
780 *cs++ = MI_STORE_DWORD_INDEX; 846 *cs++ = MI_STORE_DWORD_INDEX;
781 *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; 847 *cs++ = I915_GEM_HWS_INDEX_ADDR;
782 *cs++ = rq->global_seqno; 848 *cs++ = rq->global_seqno;
849
783 *cs++ = MI_USER_INTERRUPT; 850 *cs++ = MI_USER_INTERRUPT;
851 *cs++ = MI_NOOP;
784 852
785 rq->tail = intel_ring_offset(rq, cs); 853 rq->tail = intel_ring_offset(rq, cs);
786 assert_ring_tail_valid(rq->ring, rq->tail); 854 assert_ring_tail_valid(rq->ring, rq->tail);
787} 855}
788 856
789static const int i9xx_emit_breadcrumb_sz = 4; 857static const int i9xx_emit_breadcrumb_sz = 6;
790 858
791static void 859static void
792gen5_seqno_barrier(struct intel_engine_cs *engine) 860gen5_seqno_barrier(struct intel_engine_cs *engine)
@@ -2090,7 +2158,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2090 if (INTEL_GEN(dev_priv) >= 6) { 2158 if (INTEL_GEN(dev_priv) >= 6) {
2091 engine->irq_enable = gen6_irq_enable; 2159 engine->irq_enable = gen6_irq_enable;
2092 engine->irq_disable = gen6_irq_disable; 2160 engine->irq_disable = gen6_irq_disable;
2093 engine->irq_seqno_barrier = gen6_seqno_barrier;
2094 } else if (INTEL_GEN(dev_priv) >= 5) { 2161 } else if (INTEL_GEN(dev_priv) >= 5) {
2095 engine->irq_enable = gen5_irq_enable; 2162 engine->irq_enable = gen5_irq_enable;
2096 engine->irq_disable = gen5_irq_disable; 2163 engine->irq_disable = gen5_irq_disable;
@@ -2162,11 +2229,18 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
2162 2229
2163 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 2230 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2164 2231
2165 if (INTEL_GEN(dev_priv) >= 6) { 2232 if (INTEL_GEN(dev_priv) >= 7) {
2166 engine->init_context = intel_rcs_ctx_init; 2233 engine->init_context = intel_rcs_ctx_init;
2167 engine->emit_flush = gen7_render_ring_flush; 2234 engine->emit_flush = gen7_render_ring_flush;
2168 if (IS_GEN(dev_priv, 6)) 2235 engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
2169 engine->emit_flush = gen6_render_ring_flush; 2236 engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
2237 engine->irq_seqno_barrier = gen6_seqno_barrier;
2238 } else if (IS_GEN(dev_priv, 6)) {
2239 engine->init_context = intel_rcs_ctx_init;
2240 engine->emit_flush = gen6_render_ring_flush;
2241 engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
2242 engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
2243 engine->irq_seqno_barrier = gen6_seqno_barrier;
2170 } else if (IS_GEN(dev_priv, 5)) { 2244 } else if (IS_GEN(dev_priv, 5)) {
2171 engine->emit_flush = gen4_render_ring_flush; 2245 engine->emit_flush = gen4_render_ring_flush;
2172 } else { 2246 } else {
@@ -2201,6 +2275,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
2201 engine->set_default_submission = gen6_bsd_set_default_submission; 2275 engine->set_default_submission = gen6_bsd_set_default_submission;
2202 engine->emit_flush = gen6_bsd_ring_flush; 2276 engine->emit_flush = gen6_bsd_ring_flush;
2203 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2277 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2278
2279 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
2280 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
2281 engine->irq_seqno_barrier = gen6_seqno_barrier;
2204 } else { 2282 } else {
2205 engine->emit_flush = bsd_ring_flush; 2283 engine->emit_flush = bsd_ring_flush;
2206 if (IS_GEN(dev_priv, 5)) 2284 if (IS_GEN(dev_priv, 5))
@@ -2216,11 +2294,17 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
2216{ 2294{
2217 struct drm_i915_private *dev_priv = engine->i915; 2295 struct drm_i915_private *dev_priv = engine->i915;
2218 2296
2297 GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
2298
2219 intel_ring_default_vfuncs(dev_priv, engine); 2299 intel_ring_default_vfuncs(dev_priv, engine);
2220 2300
2221 engine->emit_flush = gen6_ring_flush; 2301 engine->emit_flush = gen6_ring_flush;
2222 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2302 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2223 2303
2304 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
2305 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
2306 engine->irq_seqno_barrier = gen6_seqno_barrier;
2307
2224 return intel_init_ring_buffer(engine); 2308 return intel_init_ring_buffer(engine);
2225} 2309}
2226 2310
@@ -2228,6 +2312,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
2228{ 2312{
2229 struct drm_i915_private *dev_priv = engine->i915; 2313 struct drm_i915_private *dev_priv = engine->i915;
2230 2314
2315 GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
2316
2231 intel_ring_default_vfuncs(dev_priv, engine); 2317 intel_ring_default_vfuncs(dev_priv, engine);
2232 2318
2233 engine->emit_flush = gen6_ring_flush; 2319 engine->emit_flush = gen6_ring_flush;
@@ -2235,5 +2321,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
2235 engine->irq_enable = hsw_vebox_irq_enable; 2321 engine->irq_enable = hsw_vebox_irq_enable;
2236 engine->irq_disable = hsw_vebox_irq_disable; 2322 engine->irq_disable = hsw_vebox_irq_disable;
2237 2323
2324 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
2325 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
2326 engine->irq_seqno_barrier = gen6_seqno_barrier;
2327
2238 return intel_init_ring_buffer(engine); 2328 return intel_init_ring_buffer(engine);
2239} 2329}