diff options
| author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-12-28 10:31:14 -0500 |
|---|---|---|
| committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-12-28 11:36:57 -0500 |
| commit | caa5915bb3c1e61bd895383364011a8921fee053 (patch) | |
| tree | aa1fa823c6d7e024e998e3f58907e946072d35e2 /drivers/gpu/drm/i915/intel_ringbuffer.c | |
| parent | 6a6237293d0c02e0902b29a86e3e353e21f7bea6 (diff) | |
drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
In preparation for removing the manual EMIT_FLUSH prior to emitting the
breadcrumb implement the flush inline with writing the breadcrumb for
ringbuffer emission.
With a combined flush+breadcrumb, we can use a single operation to both
flush and after the flush is complete (post-sync) write the breadcrumb.
This gives us a strongly ordered operation that should be sufficient to
serialise the write before we emit the interrupt; and therefore we may
take the opportunity to remove the irq_seqno_barrier w/a for gen6+.
Although using the PIPECONTROL to write the breadcrumb is slower than
MI_STORE_DWORD_IMM, by combining the operations into one and removing the
extra flush (next patch) it is faster
For gen2-5, we simply combine the MI_FLUSH into the breadcrumb emission,
though maybe we could find a solution here to the seqno-vs-interrupt
issue on Ironlake by mixing up the flush? The answer is no, adding an
MI_FLUSH before the interrupt is insufficient.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181228153114.4948-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 106 |
1 files changed, 98 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 588294a3bbd2..fc1e29305951 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
| @@ -217,7 +217,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) | |||
| 217 | * really our business. That leaves only stall at scoreboard. | 217 | * really our business. That leaves only stall at scoreboard. |
| 218 | */ | 218 | */ |
| 219 | static int | 219 | static int |
| 220 | intel_emit_post_sync_nonzero_flush(struct i915_request *rq) | 220 | gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) |
| 221 | { | 221 | { |
| 222 | u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; | 222 | u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; |
| 223 | u32 *cs; | 223 | u32 *cs; |
| @@ -257,7 +257,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) | |||
| 257 | int ret; | 257 | int ret; |
| 258 | 258 | ||
| 259 | /* Force SNB workarounds for PIPE_CONTROL flushes */ | 259 | /* Force SNB workarounds for PIPE_CONTROL flushes */ |
| 260 | ret = intel_emit_post_sync_nonzero_flush(rq); | 260 | ret = gen6_emit_post_sync_nonzero_flush(rq); |
| 261 | if (ret) | 261 | if (ret) |
| 262 | return ret; | 262 | return ret; |
| 263 | 263 | ||
| @@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) | |||
| 300 | return 0; | 300 | return 0; |
| 301 | } | 301 | } |
| 302 | 302 | ||
| 303 | static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) | ||
| 304 | { | ||
| 305 | /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ | ||
| 306 | *cs++ = GFX_OP_PIPE_CONTROL(4); | ||
| 307 | *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; | ||
| 308 | *cs++ = 0; | ||
| 309 | *cs++ = 0; | ||
| 310 | |||
| 311 | *cs++ = GFX_OP_PIPE_CONTROL(4); | ||
| 312 | *cs++ = PIPE_CONTROL_QW_WRITE; | ||
| 313 | *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; | ||
| 314 | *cs++ = 0; | ||
| 315 | |||
| 316 | /* Finally we can flush and with it emit the breadcrumb */ | ||
| 317 | *cs++ = GFX_OP_PIPE_CONTROL(4); | ||
| 318 | *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | ||
| 319 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | ||
| 320 | PIPE_CONTROL_DC_FLUSH_ENABLE | | ||
| 321 | PIPE_CONTROL_QW_WRITE | | ||
| 322 | PIPE_CONTROL_CS_STALL); | ||
| 323 | *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; | ||
| 324 | *cs++ = rq->global_seqno; | ||
| 325 | |||
| 326 | *cs++ = MI_USER_INTERRUPT; | ||
| 327 | *cs++ = MI_NOOP; | ||
| 328 | |||
| 329 | rq->tail = intel_ring_offset(rq, cs); | ||
| 330 | assert_ring_tail_valid(rq->ring, rq->tail); | ||
| 331 | } | ||
| 332 | static const int gen6_rcs_emit_breadcrumb_sz = 14; | ||
| 333 | |||
| 303 | static int | 334 | static int |
| 304 | gen7_render_ring_cs_stall_wa(struct i915_request *rq) | 335 | gen7_render_ring_cs_stall_wa(struct i915_request *rq) |
| 305 | { | 336 | { |
| @@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) | |||
| 379 | return 0; | 410 | return 0; |
| 380 | } | 411 | } |
| 381 | 412 | ||
| 413 | static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) | ||
| 414 | { | ||
| 415 | *cs++ = GFX_OP_PIPE_CONTROL(4); | ||
| 416 | *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | ||
| 417 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | ||
| 418 | PIPE_CONTROL_DC_FLUSH_ENABLE | | ||
| 419 | PIPE_CONTROL_FLUSH_ENABLE | | ||
| 420 | PIPE_CONTROL_QW_WRITE | | ||
| 421 | PIPE_CONTROL_GLOBAL_GTT_IVB | | ||
| 422 | PIPE_CONTROL_CS_STALL); | ||
| 423 | *cs++ = intel_hws_seqno_address(rq->engine); | ||
| 424 | *cs++ = rq->global_seqno; | ||
| 425 | |||
| 426 | *cs++ = MI_USER_INTERRUPT; | ||
| 427 | *cs++ = MI_NOOP; | ||
| 428 | |||
| 429 | rq->tail = intel_ring_offset(rq, cs); | ||
| 430 | assert_ring_tail_valid(rq->ring, rq->tail); | ||
| 431 | } | ||
| 432 | static const int gen7_rcs_emit_breadcrumb_sz = 6; | ||
| 433 | |||
| 434 | static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) | ||
| 435 | { | ||
| 436 | *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; | ||
| 437 | *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; | ||
| 438 | *cs++ = rq->global_seqno; | ||
| 439 | *cs++ = MI_USER_INTERRUPT; | ||
| 440 | |||
| 441 | rq->tail = intel_ring_offset(rq, cs); | ||
| 442 | assert_ring_tail_valid(rq->ring, rq->tail); | ||
| 443 | } | ||
| 444 | static const int gen6_xcs_emit_breadcrumb_sz = 4; | ||
| 445 | |||
| 382 | static void set_hwstam(struct intel_engine_cs *engine, u32 mask) | 446 | static void set_hwstam(struct intel_engine_cs *engine, u32 mask) |
| 383 | { | 447 | { |
| 384 | /* | 448 | /* |
| @@ -777,16 +841,20 @@ static void i9xx_submit_request(struct i915_request *request) | |||
| 777 | 841 | ||
| 778 | static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) | 842 | static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) |
| 779 | { | 843 | { |
| 844 | *cs++ = MI_FLUSH; | ||
| 845 | |||
| 780 | *cs++ = MI_STORE_DWORD_INDEX; | 846 | *cs++ = MI_STORE_DWORD_INDEX; |
| 781 | *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; | 847 | *cs++ = I915_GEM_HWS_INDEX_ADDR; |
| 782 | *cs++ = rq->global_seqno; | 848 | *cs++ = rq->global_seqno; |
| 849 | |||
| 783 | *cs++ = MI_USER_INTERRUPT; | 850 | *cs++ = MI_USER_INTERRUPT; |
| 851 | *cs++ = MI_NOOP; | ||
| 784 | 852 | ||
| 785 | rq->tail = intel_ring_offset(rq, cs); | 853 | rq->tail = intel_ring_offset(rq, cs); |
| 786 | assert_ring_tail_valid(rq->ring, rq->tail); | 854 | assert_ring_tail_valid(rq->ring, rq->tail); |
| 787 | } | 855 | } |
| 788 | 856 | ||
| 789 | static const int i9xx_emit_breadcrumb_sz = 4; | 857 | static const int i9xx_emit_breadcrumb_sz = 6; |
| 790 | 858 | ||
| 791 | static void | 859 | static void |
| 792 | gen5_seqno_barrier(struct intel_engine_cs *engine) | 860 | gen5_seqno_barrier(struct intel_engine_cs *engine) |
| @@ -2090,7 +2158,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, | |||
| 2090 | if (INTEL_GEN(dev_priv) >= 6) { | 2158 | if (INTEL_GEN(dev_priv) >= 6) { |
| 2091 | engine->irq_enable = gen6_irq_enable; | 2159 | engine->irq_enable = gen6_irq_enable; |
| 2092 | engine->irq_disable = gen6_irq_disable; | 2160 | engine->irq_disable = gen6_irq_disable; |
| 2093 | engine->irq_seqno_barrier = gen6_seqno_barrier; | ||
| 2094 | } else if (INTEL_GEN(dev_priv) >= 5) { | 2161 | } else if (INTEL_GEN(dev_priv) >= 5) { |
| 2095 | engine->irq_enable = gen5_irq_enable; | 2162 | engine->irq_enable = gen5_irq_enable; |
| 2096 | engine->irq_disable = gen5_irq_disable; | 2163 | engine->irq_disable = gen5_irq_disable; |
| @@ -2162,11 +2229,18 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) | |||
| 2162 | 2229 | ||
| 2163 | engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; | 2230 | engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; |
| 2164 | 2231 | ||
| 2165 | if (INTEL_GEN(dev_priv) >= 6) { | 2232 | if (INTEL_GEN(dev_priv) >= 7) { |
| 2166 | engine->init_context = intel_rcs_ctx_init; | 2233 | engine->init_context = intel_rcs_ctx_init; |
| 2167 | engine->emit_flush = gen7_render_ring_flush; | 2234 | engine->emit_flush = gen7_render_ring_flush; |
| 2168 | if (IS_GEN(dev_priv, 6)) | 2235 | engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; |
| 2169 | engine->emit_flush = gen6_render_ring_flush; | 2236 | engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz; |
| 2237 | engine->irq_seqno_barrier = gen6_seqno_barrier; | ||
| 2238 | } else if (IS_GEN(dev_priv, 6)) { | ||
| 2239 | engine->init_context = intel_rcs_ctx_init; | ||
| 2240 | engine->emit_flush = gen6_render_ring_flush; | ||
| 2241 | engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; | ||
| 2242 | engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz; | ||
| 2243 | engine->irq_seqno_barrier = gen6_seqno_barrier; | ||
| 2170 | } else if (IS_GEN(dev_priv, 5)) { | 2244 | } else if (IS_GEN(dev_priv, 5)) { |
| 2171 | engine->emit_flush = gen4_render_ring_flush; | 2245 | engine->emit_flush = gen4_render_ring_flush; |
| 2172 | } else { | 2246 | } else { |
| @@ -2201,6 +2275,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) | |||
| 2201 | engine->set_default_submission = gen6_bsd_set_default_submission; | 2275 | engine->set_default_submission = gen6_bsd_set_default_submission; |
| 2202 | engine->emit_flush = gen6_bsd_ring_flush; | 2276 | engine->emit_flush = gen6_bsd_ring_flush; |
| 2203 | engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; | 2277 | engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; |
| 2278 | |||
| 2279 | engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; | ||
| 2280 | engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; | ||
| 2281 | engine->irq_seqno_barrier = gen6_seqno_barrier; | ||
| 2204 | } else { | 2282 | } else { |
| 2205 | engine->emit_flush = bsd_ring_flush; | 2283 | engine->emit_flush = bsd_ring_flush; |
| 2206 | if (IS_GEN(dev_priv, 5)) | 2284 | if (IS_GEN(dev_priv, 5)) |
| @@ -2216,11 +2294,17 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) | |||
| 2216 | { | 2294 | { |
| 2217 | struct drm_i915_private *dev_priv = engine->i915; | 2295 | struct drm_i915_private *dev_priv = engine->i915; |
| 2218 | 2296 | ||
| 2297 | GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); | ||
| 2298 | |||
| 2219 | intel_ring_default_vfuncs(dev_priv, engine); | 2299 | intel_ring_default_vfuncs(dev_priv, engine); |
| 2220 | 2300 | ||
| 2221 | engine->emit_flush = gen6_ring_flush; | 2301 | engine->emit_flush = gen6_ring_flush; |
| 2222 | engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; | 2302 | engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; |
| 2223 | 2303 | ||
| 2304 | engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; | ||
| 2305 | engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; | ||
| 2306 | engine->irq_seqno_barrier = gen6_seqno_barrier; | ||
| 2307 | |||
| 2224 | return intel_init_ring_buffer(engine); | 2308 | return intel_init_ring_buffer(engine); |
| 2225 | } | 2309 | } |
| 2226 | 2310 | ||
| @@ -2228,6 +2312,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) | |||
| 2228 | { | 2312 | { |
| 2229 | struct drm_i915_private *dev_priv = engine->i915; | 2313 | struct drm_i915_private *dev_priv = engine->i915; |
| 2230 | 2314 | ||
| 2315 | GEM_BUG_ON(INTEL_GEN(dev_priv) < 7); | ||
| 2316 | |||
| 2231 | intel_ring_default_vfuncs(dev_priv, engine); | 2317 | intel_ring_default_vfuncs(dev_priv, engine); |
| 2232 | 2318 | ||
| 2233 | engine->emit_flush = gen6_ring_flush; | 2319 | engine->emit_flush = gen6_ring_flush; |
| @@ -2235,5 +2321,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) | |||
| 2235 | engine->irq_enable = hsw_vebox_irq_enable; | 2321 | engine->irq_enable = hsw_vebox_irq_enable; |
| 2236 | engine->irq_disable = hsw_vebox_irq_disable; | 2322 | engine->irq_disable = hsw_vebox_irq_disable; |
| 2237 | 2323 | ||
| 2324 | engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; | ||
| 2325 | engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; | ||
| 2326 | engine->irq_seqno_barrier = gen6_seqno_barrier; | ||
| 2327 | |||
| 2238 | return intel_init_ring_buffer(engine); | 2328 | return intel_init_ring_buffer(engine); |
| 2239 | } | 2329 | } |
