aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-12-28 12:16:39 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-12-31 10:35:45 -0500
commit1212bd821de8fd7d63003265bb3fe5711ebbc3f7 (patch)
treea94af6f6023a5d250cf9ae487aa301729ea9d27c /drivers/gpu/drm/i915/intel_ringbuffer.c
parentd9cad2206abf4f2986c2f74ae5cc4e2415232091 (diff)
drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
The irq_seqno_barrier is a tradeoff between doing work on every request (on the GPU) and doing work after every interrupt (on the CPU). We presume we have many more requests than interrupts! However, the current w/a for Ivybridge is an implicit delay that currently fails sporadically and consistently if we move the w/a into the irq handler itself. This makes the CPU barrier untenable for upcoming interrupt handler changes and so we need to replace it with a delay on the GPU before we send the MI_USER_INTERRUPT. As it turns out that delay is 32x MI_STORE_DWORD_IMM, or about 0.6us per request! Quite nasty, but the lesser of two evils looking to the future. Testcase: igt/gem_sync Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228171641.16531-4-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c80
1 files changed, 44 insertions, 36 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2fb3a364c390..dd996103d495 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -443,6 +443,34 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
443} 443}
444static const int gen6_xcs_emit_breadcrumb_sz = 4; 444static const int gen6_xcs_emit_breadcrumb_sz = 4;
445 445
446#define GEN7_XCS_WA 32
447static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
448{
449 int i;
450
451 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
452 *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
453 *cs++ = rq->global_seqno;
454
455 for (i = 0; i < GEN7_XCS_WA; i++) {
456 *cs++ = MI_STORE_DWORD_INDEX;
457 *cs++ = I915_GEM_HWS_INDEX_ADDR;
458 *cs++ = rq->global_seqno;
459 }
460
461 *cs++ = MI_FLUSH_DW;
462 *cs++ = 0;
463 *cs++ = 0;
464
465 *cs++ = MI_USER_INTERRUPT;
466 *cs++ = MI_NOOP;
467
468 rq->tail = intel_ring_offset(rq, cs);
469 assert_ring_tail_valid(rq->ring, rq->tail);
470}
471static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
472#undef GEN7_XCS_WA
473
446static void set_hwstam(struct intel_engine_cs *engine, u32 mask) 474static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
447{ 475{
448 /* 476 /*
@@ -875,31 +903,6 @@ gen5_seqno_barrier(struct intel_engine_cs *engine)
875} 903}
876 904
877static void 905static void
878gen6_seqno_barrier(struct intel_engine_cs *engine)
879{
880 struct drm_i915_private *dev_priv = engine->i915;
881
882 /* Workaround to force correct ordering between irq and seqno writes on
883 * ivb (and maybe also on snb) by reading from a CS register (like
884 * ACTHD) before reading the status page.
885 *
886 * Note that this effectively stalls the read by the time it takes to
887 * do a memory transaction, which more or less ensures that the write
888 * from the GPU has sufficient time to invalidate the CPU cacheline.
889 * Alternatively we could delay the interrupt from the CS ring to give
890 * the write time to land, but that would incur a delay after every
891 * batch i.e. much more frequent than a delay when waiting for the
892 * interrupt (with the same net latency).
893 *
894 * Also note that to prevent whole machine hangs on gen7, we have to
895 * take the spinlock to guard against concurrent cacheline access.
896 */
897 spin_lock_irq(&dev_priv->uncore.lock);
898 POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
899 spin_unlock_irq(&dev_priv->uncore.lock);
900}
901
902static void
903gen5_irq_enable(struct intel_engine_cs *engine) 906gen5_irq_enable(struct intel_engine_cs *engine)
904{ 907{
905 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); 908 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
@@ -2258,10 +2261,13 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
2258 engine->emit_flush = gen6_bsd_ring_flush; 2261 engine->emit_flush = gen6_bsd_ring_flush;
2259 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2262 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2260 2263
2261 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; 2264 if (IS_GEN(dev_priv, 6)) {
2262 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; 2265 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
2263 if (!IS_GEN(dev_priv, 6)) 2266 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
2264 engine->irq_seqno_barrier = gen6_seqno_barrier; 2267 } else {
2268 engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
2269 engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
2270 }
2265 } else { 2271 } else {
2266 engine->emit_flush = bsd_ring_flush; 2272 engine->emit_flush = bsd_ring_flush;
2267 if (IS_GEN(dev_priv, 5)) 2273 if (IS_GEN(dev_priv, 5))
@@ -2284,10 +2290,13 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
2284 engine->emit_flush = gen6_ring_flush; 2290 engine->emit_flush = gen6_ring_flush;
2285 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2291 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2286 2292
2287 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; 2293 if (IS_GEN(dev_priv, 6)) {
2288 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; 2294 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
2289 if (!IS_GEN(dev_priv, 6)) 2295 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
2290 engine->irq_seqno_barrier = gen6_seqno_barrier; 2296 } else {
2297 engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
2298 engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
2299 }
2291 2300
2292 return intel_init_ring_buffer(engine); 2301 return intel_init_ring_buffer(engine);
2293} 2302}
@@ -2305,9 +2314,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
2305 engine->irq_enable = hsw_vebox_irq_enable; 2314 engine->irq_enable = hsw_vebox_irq_enable;
2306 engine->irq_disable = hsw_vebox_irq_disable; 2315 engine->irq_disable = hsw_vebox_irq_disable;
2307 2316
2308 engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; 2317 engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
2309 engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; 2318 engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
2310 engine->irq_seqno_barrier = gen6_seqno_barrier;
2311 2319
2312 return intel_init_ring_buffer(engine); 2320 return intel_init_ring_buffer(engine);
2313} 2321}