From 0cb26a8ed1a61a31a79bb9fd97687fb0a76ce221 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Jun 2016 14:55:53 +0100 Subject: drm/i915: Move legacy kernel context pinning to intel_ringbuffer.c This is so that we have symmetry with intel_lrc.c and avoid a source of if (i915.enable_execlists) layering violation within i915_gem_context.c - that is we move the specific handling of the dev_priv->kernel_context for legacy submission into the legacy submission code. This depends upon the init/fini ordering between contexts and engines already defined by intel_lrc.c, and also exporting the context alignment required for pinning the legacy context. v2: Separate out pin/unpin context funcs for greater symmetry with intel_lrc. One more step towards unifying behaviour between the two classes of engines and towards fixing another bug in i915_switch_context vs requests. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1466776558-21516-2-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fedd27049814..86b80b4727fb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2321,6 +2321,47 @@ intel_ringbuffer_free(struct intel_ringbuffer *ring) kfree(ring); } +static int intel_ring_context_pin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce = &ctx->engine[engine->id]; + int ret; + + lockdep_assert_held(&ctx->i915->dev->struct_mutex); + + if (ce->pin_count++) + return 0; + + if (ce->state) { + ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0); + if (ret) + goto error; + } + + i915_gem_context_reference(ctx); + return 0; + +error: + ce->pin_count = 0; + return ret; +} + +static void intel_ring_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce = &ctx->engine[engine->id]; + + lockdep_assert_held(&ctx->i915->dev->struct_mutex); + + if (--ce->pin_count) + return; + + if (ce->state) + i915_gem_object_ggtt_unpin(ce->state); + + i915_gem_context_unreference(ctx); +} + static int intel_init_ring_buffer(struct drm_device *dev, struct intel_engine_cs *engine) { @@ -2341,6 +2382,17 @@ static int intel_init_ring_buffer(struct drm_device *dev, init_waitqueue_head(&engine->irq_queue); + /* We may need to do things with the shrinker which + * require us to immediately switch back to the default + * context. This can cause a problem as pinning the + * default context also requires GTT space which may not + * be available. To avoid this we always pin the default + * context. + */ + ret = intel_ring_context_pin(dev_priv->kernel_context, engine); + if (ret) + goto error; + ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE); if (IS_ERR(ringbuf)) { ret = PTR_ERR(ringbuf); @@ -2408,6 +2460,9 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) i915_cmd_parser_fini_ring(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + + intel_ring_context_unpin(dev_priv->kernel_context, engine); + engine->i915 = NULL; } -- cgit v1.2.2 From c7c3c07d16dd51faddeb6ae665d360be030b31b0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Jun 2016 14:55:54 +0100 Subject: drm/i915: Treat kernel context as initialised The kernel context exists simply as a placeholder and should never be executed with a render context. It does not need the golden render state, as that will always be applied to a user context. By skipping the initialisation we can avoid issues in attempting to program the golden render context when trying to make the hardware idle. v2: Rebase Testcase: igt/drm_module_reload_basic #byt Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95634 Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1466776558-21516-3-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 86b80b4727fb..04a2d141e690 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2338,6 +2338,16 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, goto error; } + /* The kernel context is only used as a placeholder for flushing the + * active context. It is never used for submitting user rendering and + * as such never requires the golden render context, and so we can skip + * emitting it when we switch to the kernel context. This is required + * as during eviction we cannot allocate and pin the renderstate in + * order to initialise the context. + */ + if (ctx == ctx->i915->kernel_context) + ce->initialised = true; + i915_gem_context_reference(ctx); return 0; -- cgit v1.2.2 From 25ab57f42f96f7e66af29546bcc67c9057277e84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jun 2016 15:33:29 +0100 Subject: drm/i915: Convert wait_for(I915_READ(reg)) to intel_wait_for_register() By using the out-of-line intel_wait_for_register() not only do we can efficiency from using the hybrid wait_for() contained within, but we avoid code bloat from the numerous inlined loops, in total (all patches): text data bss dec hex filename 1078551 4557 416 1083524 108884 drivers/gpu/drm/i915/i915.ko 1070775 4557 416 1075748 106a24 drivers/gpu/drm/i915/i915.ko Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467297225-21379-46-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 04a2d141e690..7ef3e68c8fd6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -515,8 +515,9 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) I915_WRITE(reg, _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | INSTPM_SYNC_FLUSH)); - if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, - 1000)) + if (intel_wait_for_register(dev_priv, + reg, INSTPM_SYNC_FLUSH, 0, + 1000)) DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", engine->name); } -- cgit v1.2.2 From 3d808eb1711f2431fe6ab9f912b2ff795bb74013 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jun 2016 15:33:30 +0100 Subject: drm/i915: Convert wait_for(I915_READ(reg)) to intel_wait_for_register() By using the out-of-line intel_wait_for_register() not only do we can efficiency from using the hybrid wait_for() contained within, but we avoid code bloat from the numerous inlined loops, in total (all patches): text data bss dec hex filename 1078551 4557 416 1083524 108884 drivers/gpu/drm/i915/i915.ko 1070775 4557 416 1075748 106a24 drivers/gpu/drm/i915/i915.ko Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467297225-21379-47-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7ef3e68c8fd6..05018243aaef 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -529,7 +529,11 @@ static bool stop_ring(struct intel_engine_cs *engine) if (!IS_GEN2(dev_priv)) { I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); - if (wait_for((I915_READ_MODE(engine) & MODE_IDLE) != 0, 1000)) { + if (intel_wait_for_register(dev_priv, + RING_MI_MODE(engine->mmio_base), + MODE_IDLE, + MODE_IDLE, + 1000)) { DRM_ERROR("%s : timed out trying to stop ring\n", engine->name); /* Sometimes we observe that the idle flag is not -- cgit v1.2.2 From d54fe4aad783ef68ffbd5e1bb7b4a33669b1b257 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jun 2016 15:33:31 +0100 Subject: drm/i915: Convert wait_for(I915_READ(reg)) to intel_wait_for_register() By using the out-of-line intel_wait_for_register() not only do we can efficiency from using the hybrid wait_for() contained within, but we avoid code bloat from the numerous inlined loops, in total (all patches): text data bss dec hex filename 1078551 4557 416 1083524 108884 drivers/gpu/drm/i915/i915.ko 1070775 4557 416 1075748 106a24 drivers/gpu/drm/i915/i915.ko Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467297225-21379-48-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 05018243aaef..c4365cc1f133 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2696,9 +2696,11 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, I915_WRITE64(GEN6_BSD_RNCID, 0x0); /* Wait for the ring not to be idle, i.e. for it to wake up. */ - if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & - GEN6_BSD_SLEEP_INDICATOR) == 0, - 50)) + if (intel_wait_for_register(dev_priv, + GEN6_BSD_SLEEP_PSMI_CONTROL, + GEN6_BSD_SLEEP_INDICATOR, + 0, + 50)) DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ -- cgit v1.2.2 From 76f8421f2a67cdecd616705b7a0c3750d6d42c05 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jun 2016 15:33:45 +0100 Subject: drm/i915: Perform Sandybridge BSD tail write under the forcewake Since we have a sequence of register reads and writes, we can reduce the latency of starting the BSD ring by performing all the mmio operations under the same forcewake wakeref. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467297225-21379-62-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c4365cc1f133..7c93d4c210e5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2684,34 +2684,38 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, { struct drm_i915_private *dev_priv = engine->i915; + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + /* Every tail move must follow the sequence below */ /* Disable notification that the ring is IDLE. The GT * will then assume that it is busy and bring it out of rc6. */ - I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); + I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); /* Clear the context id. Here be magic! */ - I915_WRITE64(GEN6_BSD_RNCID, 0x0); + I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); /* Wait for the ring not to be idle, i.e. for it to wake up. */ - if (intel_wait_for_register(dev_priv, - GEN6_BSD_SLEEP_PSMI_CONTROL, - GEN6_BSD_SLEEP_INDICATOR, - 0, - 50)) + if (intel_wait_for_register_fw(dev_priv, + GEN6_BSD_SLEEP_PSMI_CONTROL, + GEN6_BSD_SLEEP_INDICATOR, + 0, + 50)) DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ - I915_WRITE_TAIL(engine, value); - POSTING_READ(RING_TAIL(engine->mmio_base)); + I915_WRITE_FW(RING_TAIL(engine->mmio_base), value); + POSTING_READ_FW(RING_TAIL(engine->mmio_base)); /* Let the ring send IDLE messages to the GT again, * and so let it sleep to conserve power when idle. */ - I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, - _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); + I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, + _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, -- cgit v1.2.2 From 06a2fe22796d9497a35778acc0becb073b242e35 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:20 +0100 Subject: drm/i915: Consolidate write_tail vfunc initializer Introduce a function which initializes vfuncs mostly common across engines and move write_tail initialization in it since only one engine overrides the default. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7c93d4c210e5..f2f78618abb4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2884,6 +2884,12 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, return 0; } +static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + engine->write_tail = ring_write_tail; +} + int intel_init_render_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2897,6 +2903,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->hw_id = 0; engine->mmio_base = RENDER_RING_BASE; + intel_ring_default_vfuncs(dev_priv, engine); + if (INTEL_GEN(dev_priv) >= 8) { if (i915_semaphore_is_enabled(dev_priv)) { obj = i915_gem_object_create(dev, 4096); @@ -2988,7 +2996,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) } engine->irq_enable_mask = I915_USER_INTERRUPT; } - engine->write_tail = ring_write_tail; if (IS_HASWELL(dev_priv)) engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; @@ -3047,7 +3054,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) engine->exec_id = I915_EXEC_BSD; engine->hw_id = 1; - engine->write_tail = ring_write_tail; + intel_ring_default_vfuncs(dev_priv, engine); + if (INTEL_GEN(dev_priv) >= 6) { engine->mmio_base = GEN6_BSD_RING_BASE; /* gen6 bsd needs a special wa for tail updates */ @@ -3125,9 +3133,10 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->id = VCS2; engine->exec_id = I915_EXEC_BSD; engine->hw_id = 4; - - engine->write_tail = ring_write_tail; engine->mmio_base = GEN8_BSD2_RING_BASE; + + intel_ring_default_vfuncs(dev_priv, engine); + engine->flush = gen6_bsd_ring_flush; engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; @@ -3158,9 +3167,10 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) engine->id = BCS; engine->exec_id = I915_EXEC_BLT; engine->hw_id = 2; - engine->mmio_base = BLT_RING_BASE; - engine->write_tail = ring_write_tail; + + intel_ring_default_vfuncs(dev_priv, engine); + engine->flush = gen6_ring_flush; engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; @@ -3218,9 +3228,10 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->id = VECS; engine->exec_id = I915_EXEC_VEBOX; engine->hw_id = 3; - engine->mmio_base = VEBOX_RING_BASE; - engine->write_tail = ring_write_tail; + + intel_ring_default_vfuncs(dev_priv, engine); + engine->flush = gen6_ring_flush; engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; -- cgit v1.2.2 From 7445a2a41362a570f25354ab1f9b6c23d13b30d3 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:21 +0100 Subject: drm/i915: Consolidate add_request vfunc All engines apart from render select this based on Gen. Move it to the common helper as well. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f2f78618abb4..0e9ab324e35d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2888,6 +2888,11 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { engine->write_tail = ring_write_tail; + + if (INTEL_GEN(dev_priv) >= 6) + engine->add_request = gen6_add_request; + else + engine->add_request = i9xx_add_request; } int intel_init_render_ring_buffer(struct drm_device *dev) @@ -2939,7 +2944,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; - engine->add_request = gen6_add_request; engine->flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) engine->flush = gen6_render_ring_flush; @@ -2980,7 +2984,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { - engine->add_request = i9xx_add_request; if (INTEL_GEN(dev_priv) < 4) engine->flush = gen2_render_ring_flush; else @@ -3062,7 +3065,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; engine->flush = gen6_bsd_ring_flush; - engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; @@ -3102,7 +3104,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } else { engine->mmio_base = BSD_RING_BASE; engine->flush = bsd_ring_flush; - engine->add_request = i9xx_add_request; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (IS_GEN5(dev_priv)) { @@ -3138,7 +3139,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_bsd_ring_flush; - engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; @@ -3172,7 +3172,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; @@ -3233,7 +3232,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; -- cgit v1.2.2 From cc54a828300869528cc8993b4897389fa542cb00 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:22 +0100 Subject: drm/i915: Consolidate seqno_barrier vfunc Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0e9ab324e35d..98a5ac4aaaa9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2889,10 +2889,12 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, { engine->write_tail = ring_write_tail; - if (INTEL_GEN(dev_priv) >= 6) + if (INTEL_GEN(dev_priv) >= 6) { engine->add_request = gen6_add_request; - else + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else { engine->add_request = i9xx_add_request; + } } int intel_init_render_ring_buffer(struct drm_device *dev) @@ -2950,7 +2952,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->irq_get = gen6_ring_get_irq; engine->irq_put = gen6_ring_put_irq; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev_priv)) { @@ -3065,7 +3066,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; engine->flush = gen6_bsd_ring_flush; - engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (INTEL_GEN(dev_priv) >= 8) { @@ -3139,7 +3139,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_bsd_ring_flush; - engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; engine->irq_enable_mask = @@ -3172,7 +3171,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (INTEL_GEN(dev_priv) >= 8) { @@ -3232,7 +3230,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - engine->irq_seqno_barrier = gen6_seqno_barrier; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; -- cgit v1.2.2 From b9700325cb0203d3cbe44d25169d7aa195c1f28d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:23 +0100 Subject: drm/i915: Consolidate get and put irq vfuncs v2: Consistent INTEL_GEN vs IS_GEN usage. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 46 ++++++++++++--------------------- 1 file changed, 17 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 98a5ac4aaaa9..72fdf2fc5ff4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2895,6 +2895,23 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, } else { engine->add_request = i9xx_add_request; } + + if (INTEL_GEN(dev_priv) >= 8) { + engine->irq_get = gen8_ring_get_irq; + engine->irq_put = gen8_ring_put_irq; + } else if (INTEL_GEN(dev_priv) >= 6) { + engine->irq_get = gen6_ring_get_irq; + engine->irq_put = gen6_ring_put_irq; + } else if (INTEL_GEN(dev_priv) >= 5) { + engine->irq_get = gen5_ring_get_irq; + engine->irq_put = gen5_ring_put_irq; + } else if (INTEL_GEN(dev_priv) >= 3) { + engine->irq_get = i9xx_ring_get_irq; + engine->irq_put = i9xx_ring_put_irq; + } else { + engine->irq_get = i8xx_ring_get_irq; + engine->irq_put = i8xx_ring_put_irq; + } } int intel_init_render_ring_buffer(struct drm_device *dev) @@ -2933,8 +2950,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->init_context = intel_rcs_ctx_init; engine->add_request = gen8_render_add_request; engine->flush = gen8_render_ring_flush; - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; @@ -2949,8 +2964,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) engine->flush = gen6_render_ring_flush; - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; @@ -2980,8 +2993,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->flush = gen4_render_ring_flush; engine->get_seqno = pc_render_get_seqno; engine->set_seqno = pc_render_set_seqno; - engine->irq_get = gen5_ring_get_irq; - engine->irq_put = gen5_ring_put_irq; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { @@ -2991,13 +3002,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->flush = gen4_render_ring_flush; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; - if (IS_GEN2(dev_priv)) { - engine->irq_get = i8xx_ring_get_irq; - engine->irq_put = i8xx_ring_put_irq; - } else { - engine->irq_get = i9xx_ring_get_irq; - engine->irq_put = i9xx_ring_put_irq; - } engine->irq_enable_mask = I915_USER_INTERRUPT; } @@ -3071,8 +3075,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { @@ -3082,8 +3084,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } } else { engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { @@ -3108,12 +3108,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) engine->set_seqno = ring_set_seqno; if (IS_GEN5(dev_priv)) { engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; - engine->irq_get = gen5_ring_get_irq; - engine->irq_put = gen5_ring_put_irq; } else { engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; - engine->irq_get = i9xx_ring_get_irq; - engine->irq_put = i9xx_ring_put_irq; } engine->dispatch_execbuffer = i965_dispatch_execbuffer; } @@ -3143,8 +3139,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->set_seqno = ring_set_seqno; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { @@ -3176,8 +3170,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen8_ring_sync; @@ -3186,8 +3178,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) } } else { engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.signal = gen6_signal; @@ -3236,8 +3226,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen8_ring_sync; -- cgit v1.2.2 From 604096d7785bc4961dab928023ea61bcd09d59c0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:24 +0100 Subject: drm/i915: Consolidate get/set_seqno Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 72fdf2fc5ff4..5c7d2659af1b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2888,6 +2888,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { engine->write_tail = ring_write_tail; + engine->get_seqno = ring_get_seqno; + engine->set_seqno = ring_set_seqno; if (INTEL_GEN(dev_priv) >= 6) { engine->add_request = gen6_add_request; @@ -2951,8 +2953,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->add_request = gen8_render_add_request; engine->flush = gen8_render_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev_priv)) { WARN_ON(!dev_priv->semaphore_obj); engine->semaphore.sync_to = gen8_ring_sync; @@ -2965,8 +2965,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (IS_GEN6(dev_priv)) engine->flush = gen6_render_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen6_ring_sync; engine->semaphore.signal = gen6_signal; @@ -3000,8 +2998,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->flush = gen2_render_ring_flush; else engine->flush = gen4_render_ring_flush; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; engine->irq_enable_mask = I915_USER_INTERRUPT; } @@ -3070,8 +3066,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; engine->flush = gen6_bsd_ring_flush; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; @@ -3104,8 +3098,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } else { engine->mmio_base = BSD_RING_BASE; engine->flush = bsd_ring_flush; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; if (IS_GEN5(dev_priv)) { engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; } else { @@ -3135,8 +3127,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_bsd_ring_flush; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; engine->dispatch_execbuffer = @@ -3165,8 +3155,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; @@ -3220,8 +3208,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = -- cgit v1.2.2 From 1d8a1337f3c8346447cbf1db6af468df6cf58c76 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:25 +0100 Subject: drm/i915: Consolidate init_hw vfunc Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5c7d2659af1b..100e26c1d765 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2887,6 +2887,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + engine->init_hw = init_ring_common; engine->write_tail = ring_write_tail; engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; @@ -3105,7 +3106,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } engine->dispatch_execbuffer = i965_dispatch_execbuffer; } - engine->init_hw = init_ring_common; return intel_init_ring_buffer(dev, engine); } @@ -3136,7 +3136,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->semaphore.signal = gen8_xcs_signal; GEN8_RING_SEMAPHORE_INIT(engine); } - engine->init_hw = init_ring_common; return intel_init_ring_buffer(dev, engine); } @@ -3189,7 +3188,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; } } - engine->init_hw = init_ring_common; return intel_init_ring_buffer(dev, engine); } @@ -3238,7 +3236,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; } } - engine->init_hw = init_ring_common; return intel_init_ring_buffer(dev, engine); } -- cgit v1.2.2 From 960ecaad7579a38326bcd071d1be8c085d888ddf Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 17:40:26 +0100 Subject: drm/i915: Consolidate dispatch_execbuffer vfunc v2: Put dispatch_execbuffer before add_request. (Chris Wilson) v3: Fix add_request and irq_seqno_barrier for gen8+. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 100e26c1d765..2b4bf23b01fb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2892,10 +2892,16 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 8) { + engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->add_request = gen6_add_request; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 6) { + engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; engine->add_request = gen6_add_request; engine->irq_seqno_barrier = gen6_seqno_barrier; } else { + engine->dispatch_execbuffer = i965_dispatch_execbuffer; engine->add_request = i9xx_add_request; } @@ -3004,15 +3010,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (IS_HASWELL(dev_priv)) engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; - else if (IS_GEN8(dev_priv)) - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; - else if (INTEL_GEN(dev_priv) >= 6) - engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; - else if (INTEL_GEN(dev_priv) >= 4) - engine->dispatch_execbuffer = i965_dispatch_execbuffer; else if (IS_I830(dev_priv) || IS_845G(dev_priv)) engine->dispatch_execbuffer = i830_dispatch_execbuffer; - else + else if (INTEL_GEN(dev_priv) <= 3) engine->dispatch_execbuffer = i915_dispatch_execbuffer; engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; @@ -3070,8 +3070,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - engine->dispatch_execbuffer = - gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen8_ring_sync; engine->semaphore.signal = gen8_xcs_signal; @@ -3079,8 +3077,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } } else { engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - engine->dispatch_execbuffer = - gen6_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen6_ring_sync; engine->semaphore.signal = gen6_signal; @@ -3104,7 +3100,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } else { engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - engine->dispatch_execbuffer = i965_dispatch_execbuffer; } return intel_init_ring_buffer(dev, engine); @@ -3129,8 +3124,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; - engine->dispatch_execbuffer = - gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen8_ring_sync; engine->semaphore.signal = gen8_xcs_signal; @@ -3157,7 +3150,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen8_ring_sync; engine->semaphore.signal = gen8_xcs_signal; @@ -3165,7 +3157,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) } } else { engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.signal = gen6_signal; engine->semaphore.sync_to = gen6_ring_sync; @@ -3210,7 +3201,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen8_ring_sync; engine->semaphore.signal = gen8_xcs_signal; @@ -3220,7 +3210,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; engine->irq_get = hsw_vebox_get_irq; engine->irq_put = hsw_vebox_put_irq; - engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev_priv)) { engine->semaphore.sync_to = gen6_ring_sync; engine->semaphore.signal = gen6_signal; -- cgit v1.2.2 From d9a64610b27af01404cc881850cc4903c51a851d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:27 +0100 Subject: drm/i915: Consolidate semaphore vfuncs init Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 48 +++++++++++++-------------------- 1 file changed, 18 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2b4bf23b01fb..827d23f321f8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2884,6 +2884,22 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, return 0; } +static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + if (!i915_semaphore_is_enabled(dev_priv)) + return; + + if (INTEL_GEN(dev_priv) >= 8) { + engine->semaphore.sync_to = gen8_ring_sync; + engine->semaphore.signal = gen8_xcs_signal; + GEN8_RING_SEMAPHORE_INIT(engine); + } else if (INTEL_GEN(dev_priv) >= 6) { + engine->semaphore.sync_to = gen6_ring_sync; + engine->semaphore.signal = gen6_signal; + } +} + static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -2921,6 +2937,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->irq_get = i8xx_ring_get_irq; engine->irq_put = i8xx_ring_put_irq; } + + intel_ring_init_semaphores(dev_priv, engine); } int intel_init_render_ring_buffer(struct drm_device *dev) @@ -2962,9 +2980,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (i915_semaphore_is_enabled(dev_priv)) { WARN_ON(!dev_priv->semaphore_obj); - engine->semaphore.sync_to = gen8_ring_sync; engine->semaphore.signal = gen8_rcs_signal; - GEN8_RING_SEMAPHORE_INIT(engine); } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; @@ -2973,8 +2989,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->flush = gen6_render_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen6_ring_sync; - engine->semaphore.signal = gen6_signal; /* * The current semaphore is only applied on pre-gen8 * platform. And there is no VCS2 ring on the pre-gen8 @@ -3070,16 +3084,9 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen8_ring_sync; - engine->semaphore.signal = gen8_xcs_signal; - GEN8_RING_SEMAPHORE_INIT(engine); - } } else { engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen6_ring_sync; - engine->semaphore.signal = gen6_signal; engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; @@ -3124,11 +3131,6 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; - if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen8_ring_sync; - engine->semaphore.signal = gen8_xcs_signal; - GEN8_RING_SEMAPHORE_INIT(engine); - } return intel_init_ring_buffer(dev, engine); } @@ -3150,16 +3152,9 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen8_ring_sync; - engine->semaphore.signal = gen8_xcs_signal; - GEN8_RING_SEMAPHORE_INIT(engine); - } } else { engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.signal = gen6_signal; - engine->semaphore.sync_to = gen6_ring_sync; /* * The current semaphore is only applied on pre-gen8 * platform. And there is no VCS2 ring on the pre-gen8 @@ -3201,18 +3196,11 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; - if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen8_ring_sync; - engine->semaphore.signal = gen8_xcs_signal; - GEN8_RING_SEMAPHORE_INIT(engine); - } } else { engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; engine->irq_get = hsw_vebox_get_irq; engine->irq_put = hsw_vebox_put_irq; if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.sync_to = gen6_ring_sync; - engine->semaphore.signal = gen6_signal; engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; -- cgit v1.2.2 From db3d4019abaa3a52e66a86fc938a8315f8c4200a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:28 +0100 Subject: drm/i915: Move semaphore object creation into intel_ring_init_semaphores The object needs to be created before semaphores can be initialized on any ring and it makes sense to pull it out to this semaphore dedicated helper. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 45 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 827d23f321f8..0af8bb1c0a68 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2887,6 +2887,30 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + struct drm_i915_gem_object *obj; + int ret; + + if (!i915_semaphore_is_enabled(dev_priv)) + return; + + if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { + obj = i915_gem_object_create(dev_priv->dev, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); + i915.semaphores = 0; + } else { + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); + if (ret != 0) { + drm_gem_object_unreference(&obj->base); + DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); + i915.semaphores = 0; + } else { + dev_priv->semaphore_obj = obj; + } + } + } + if (!i915_semaphore_is_enabled(dev_priv)) return; @@ -2957,31 +2981,12 @@ int intel_init_render_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); if (INTEL_GEN(dev_priv) >= 8) { - if (i915_semaphore_is_enabled(dev_priv)) { - obj = i915_gem_object_create(dev, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); - if (ret != 0) { - drm_gem_object_unreference(&obj->base); - DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else - dev_priv->semaphore_obj = obj; - } - } - engine->init_context = intel_rcs_ctx_init; engine->add_request = gen8_render_add_request; engine->flush = gen8_render_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - if (i915_semaphore_is_enabled(dev_priv)) { - WARN_ON(!dev_priv->semaphore_obj); + if (i915_semaphore_is_enabled(dev_priv)) engine->semaphore.signal = gen8_rcs_signal; - } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->flush = gen7_render_ring_flush; -- cgit v1.2.2 From 1b9e665064c6fad91f4265743555406f5804dbcb Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:29 +0100 Subject: drm/i915: Compact Gen8 semaphore initialization Replace the macro initializer with a programatic loop which results in smaller code and hopefully just as clear. v2: Rebase. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0af8bb1c0a68..aef6824b4c75 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2888,7 +2888,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; - int ret; + int ret, i; if (!i915_semaphore_is_enabled(dev_priv)) return; @@ -2915,9 +2915,21 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, return; if (INTEL_GEN(dev_priv) >= 8) { + u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); + engine->semaphore.sync_to = gen8_ring_sync; engine->semaphore.signal = gen8_xcs_signal; - GEN8_RING_SEMAPHORE_INIT(engine); + + for (i = 0; i < I915_NUM_ENGINES; i++) { + u64 ring_offset; + + if (i != engine->id) + ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); + else + ring_offset = MI_SEMAPHORE_SYNC_INVALID; + + engine->semaphore.signal_ggtt[i] = ring_offset; + } } else if (INTEL_GEN(dev_priv) >= 6) { engine->semaphore.sync_to = gen6_ring_sync; engine->semaphore.signal = gen6_signal; -- cgit v1.2.2 From c38c651b39a642d5900e9c8122cfc9bff4343538 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:30 +0100 Subject: drm/i915: Compact gen8_ring_sync Store the semaphore offset in a temporary variable to avoid having to get the VMA offset twice. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index aef6824b4c75..6d5e861fe6c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1547,6 +1547,7 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req, { struct intel_engine_cs *waiter = waiter_req->engine; struct drm_i915_private *dev_priv = waiter_req->i915; + u64 offset = GEN8_WAIT_OFFSET(waiter, signaller->id); struct i915_hw_ppgtt *ppgtt; int ret; @@ -1558,10 +1559,8 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req, MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_SAD_GTE_SDD); intel_ring_emit(waiter, seqno); - intel_ring_emit(waiter, - lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id))); - intel_ring_emit(waiter, - upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id))); + intel_ring_emit(waiter, lower_32_bits(offset)); + intel_ring_emit(waiter, upper_32_bits(offset)); intel_ring_advance(waiter); /* When the !RCS engines idle waiting upon a semaphore, they lose their -- cgit v1.2.2 From 4b8e38a9ef490d8e7a4225d05aaf4619eb632d82 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:31 +0100 Subject: drm/i915: Consolidate legacy semaphore initialization Replace per-engine initialization with a common half-programatic, half-data driven code for ease of maintenance and compactness. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 110 ++++++++++++++------------------ 1 file changed, 48 insertions(+), 62 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6d5e861fe6c1..2080668d3630 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2932,6 +2932,54 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } else if (INTEL_GEN(dev_priv) >= 6) { engine->semaphore.sync_to = gen6_ring_sync; engine->semaphore.signal = gen6_signal; + + /* + * The current semaphore is only applied on pre-gen8 + * platform. And there is no VCS2 ring on the pre-gen8 + * platform. So the semaphore between RCS and VCS2 is + * initialized as INVALID. Gen8 will initialize the + * sema between VCS2 and RCS later. + */ + for (i = 0; i < I915_NUM_ENGINES; i++) { + static const struct { + u32 wait_mbox; + i915_reg_t mbox_reg; + } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { + [RCS] = { + [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, + [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, + [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, + }, + [VCS] = { + [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, + [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, + [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, + }, + [BCS] = { + [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, + [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, + [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, + }, + [VECS] = { + [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, + [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, + [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, + }, + }; + u32 wait_mbox; + i915_reg_t mbox_reg; + + if (i == engine->id || i == VCS2) { + wait_mbox = MI_SEMAPHORE_SYNC_INVALID; + mbox_reg = GEN6_NOSYNC; + } else { + wait_mbox = sem_data[engine->id][i].wait_mbox; + mbox_reg = sem_data[engine->id][i].mbox_reg; + } + + engine->semaphore.mbox.wait[i] = wait_mbox; + engine->semaphore.mbox.signal[i] = mbox_reg; + } } } @@ -3004,25 +3052,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (IS_GEN6(dev_priv)) engine->flush = gen6_render_ring_flush; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - if (i915_semaphore_is_enabled(dev_priv)) { - /* - * The current semaphore is only applied on pre-gen8 - * platform. And there is no VCS2 ring on the pre-gen8 - * platform. So the semaphore between RCS and VCS2 is - * initialized as INVALID. Gen8 will initialize the - * sema between VCS2 and RCS later. - */ - engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV; - engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB; - engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE; - engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - engine->semaphore.mbox.signal[VCS] = GEN6_VRSYNC; - engine->semaphore.mbox.signal[BCS] = GEN6_BRSYNC; - engine->semaphore.mbox.signal[VECS] = GEN6_VERSYNC; - engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; - } } else if (IS_GEN5(dev_priv)) { engine->add_request = pc_render_add_request; engine->flush = gen4_render_ring_flush; @@ -3102,18 +3131,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; } else { engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; - engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; - engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE; - engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.signal[RCS] = GEN6_RVSYNC; - engine->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - engine->semaphore.mbox.signal[BCS] = GEN6_BVSYNC; - engine->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC; - engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; - } } } else { engine->mmio_base = BSD_RING_BASE; @@ -3170,25 +3187,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; } else { engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - if (i915_semaphore_is_enabled(dev_priv)) { - /* - * The current semaphore is only applied on pre-gen8 - * platform. And there is no VCS2 ring on the pre-gen8 - * platform. So the semaphore between BCS and VCS2 is - * initialized as INVALID. Gen8 will initialize the - * sema between BCS and VCS2 later. - */ - engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR; - engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV; - engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE; - engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.signal[RCS] = GEN6_RBSYNC; - engine->semaphore.mbox.signal[VCS] = GEN6_VBSYNC; - engine->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - engine->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC; - engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; - } } return intel_init_ring_buffer(dev, engine); @@ -3216,18 +3214,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; engine->irq_get = hsw_vebox_get_irq; engine->irq_put = hsw_vebox_put_irq; - if (i915_semaphore_is_enabled(dev_priv)) { - engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; - engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; - engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; - engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - engine->semaphore.mbox.signal[RCS] = GEN6_RVESYNC; - engine->semaphore.mbox.signal[VCS] = GEN6_VVESYNC; - engine->semaphore.mbox.signal[BCS] = GEN6_BVESYNC; - engine->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; - } } return intel_init_ring_buffer(dev, engine); -- cgit v1.2.2 From 8d228911ffc08bfaa8224d160df8ca64e9dc8274 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Jun 2016 16:09:32 +0100 Subject: drm/i915: Trim some if-else braces Just a bit of cleanup after the previous refactoring. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1467212972-861-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2080668d3630..4d61ea923154 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -3126,20 +3126,18 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; engine->flush = gen6_bsd_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) { + if (INTEL_GEN(dev_priv) >= 8) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - } else { + else engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - } } else { engine->mmio_base = BSD_RING_BASE; engine->flush = bsd_ring_flush; - if (IS_GEN5(dev_priv)) { + if (IS_GEN5(dev_priv)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; - } else { + else engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; - } } return intel_init_ring_buffer(dev, engine); @@ -3182,12 +3180,11 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) { + if (INTEL_GEN(dev_priv) >= 8) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - } else { + else engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - } return intel_init_ring_buffer(dev, engine); } -- cgit v1.2.2 From 6f7bef75d10e04ad62c0264614df4e013e5e4b67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 09:18:12 +0100 Subject: drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together Consolidate the block of default vfuncs for dispatching the batchbuffer. Just a minor tweak on top of Tvrtko's great job of tidying up the vfunc initialisation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467361093-20209-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4d61ea923154..caebe812d10f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2991,25 +2991,29 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; - if (INTEL_GEN(dev_priv) >= 8) { - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->add_request = i9xx_add_request; + if (INTEL_GEN(dev_priv) >= 6) engine->add_request = gen6_add_request; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { + + if (INTEL_GEN(dev_priv) >= 8) + engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + else if (INTEL_GEN(dev_priv) >= 6) engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; - engine->add_request = gen6_add_request; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else { + else if (INTEL_GEN(dev_priv) >= 4) engine->dispatch_execbuffer = i965_dispatch_execbuffer; - engine->add_request = i9xx_add_request; - } + else if (IS_I830(dev_priv) || IS_845G(dev_priv)) + engine->dispatch_execbuffer = i830_dispatch_execbuffer; + else + engine->dispatch_execbuffer = i915_dispatch_execbuffer; if (INTEL_GEN(dev_priv) >= 8) { engine->irq_get = gen8_ring_get_irq; engine->irq_put = gen8_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 6) { engine->irq_get = gen6_ring_get_irq; engine->irq_put = gen6_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 5) { engine->irq_get = gen5_ring_get_irq; engine->irq_put = gen5_ring_put_irq; @@ -3069,10 +3073,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (IS_HASWELL(dev_priv)) engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; - else if (IS_I830(dev_priv) || IS_845G(dev_priv)) - engine->dispatch_execbuffer = i830_dispatch_execbuffer; - else if (INTEL_GEN(dev_priv) <= 3) - engine->dispatch_execbuffer = i915_dispatch_execbuffer; + engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; -- cgit v1.2.2 From ed00307893df2a8386307aba36e4a552b72ffb89 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 09:18:13 +0100 Subject: drm/i915/ringbuffer: Move all default irq vfuncs init to a separate func Just plonk all the default irq vfuncs together in one function to keep the initialisers of reasonable size. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467361093-20209-2-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 43 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index caebe812d10f..24cdc920f4b4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2983,6 +2983,29 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } } +static void intel_ring_init_irq(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + if (INTEL_GEN(dev_priv) >= 8) { + engine->irq_get = gen8_ring_get_irq; + engine->irq_put = gen8_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 6) { + engine->irq_get = gen6_ring_get_irq; + engine->irq_put = gen6_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 5) { + engine->irq_get = gen5_ring_get_irq; + engine->irq_put = gen5_ring_put_irq; + } else if (INTEL_GEN(dev_priv) >= 3) { + engine->irq_get = i9xx_ring_get_irq; + engine->irq_put = i9xx_ring_put_irq; + } else { + engine->irq_get = i8xx_ring_get_irq; + engine->irq_put = i8xx_ring_put_irq; + } +} + static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -3006,25 +3029,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, else engine->dispatch_execbuffer = i915_dispatch_execbuffer; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 5) { - engine->irq_get = gen5_ring_get_irq; - engine->irq_put = gen5_ring_put_irq; - } else if (INTEL_GEN(dev_priv) >= 3) { - engine->irq_get = i9xx_ring_get_irq; - engine->irq_put = i9xx_ring_put_irq; - } else { - engine->irq_get = i8xx_ring_get_irq; - engine->irq_put = i8xx_ring_put_irq; - } - + intel_ring_init_irq(dev_priv, engine); intel_ring_init_semaphores(dev_priv, engine); } -- cgit v1.2.2 From 688e6c7258164de86d626e8e983ca8d28015c263 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:15 +0100 Subject: drm/i915: Slaughter the thundering i915_wait_request herd One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson Cc: "Rogozhkin, Dmitry V" Cc: "Gong, Zhipeng" Cc: Tvrtko Ursulin Cc: Dave Gordon Cc: "Goel, Akash" Reviewed-by: Tvrtko Ursulin #v18 Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 24cdc920f4b4..af50aa01bcd9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2394,7 +2394,9 @@ static int intel_init_ring_buffer(struct drm_device *dev, memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); - init_waitqueue_head(&engine->irq_queue); + ret = intel_engine_init_breadcrumbs(engine); + if (ret) + goto error; /* We may need to do things with the shrinker which * require us to immediately switch back to the default @@ -2474,6 +2476,7 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) i915_cmd_parser_fini_ring(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + intel_engine_fini_breadcrumbs(engine); intel_ring_context_unpin(dev_priv->kernel_context, engine); @@ -2676,6 +2679,13 @@ void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) engine->last_submitted_seqno = seqno; engine->hangcheck.seqno = seqno; + + /* After manually advancing the seqno, fake the interrupt in case + * there are any waiters for that seqno. + */ + rcu_read_lock(); + intel_engine_wakeup(engine); + rcu_read_unlock(); } static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, -- cgit v1.2.2 From 1b7744e7ba4e4ad17b5910796c9b1ca74063df01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:17 +0100 Subject: drm/i915: Use HWS for seqno tracking everywhere By using the same address for storing the HWS on every platform, we can remove the platform specific vfuncs and reduce the get-seqno routine to a single read of a cached memory location. v2: Fix semaphore_passed() to look at the signaling engine (not the waiter's) Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-8-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 65 +++++++++------------------------ 1 file changed, 17 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index af50aa01bcd9..02104fbf9045 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1367,19 +1367,17 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, return ret; for_each_engine_id(waiter, dev_priv, id) { - u32 seqno; u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); intel_ring_emit(signaller, lower_32_bits(gtt_offset)); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, seqno); + intel_ring_emit(signaller, signaller_req->seqno); intel_ring_emit(signaller, 0); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); @@ -1408,18 +1406,16 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, return ret; for_each_engine_id(waiter, dev_priv, id) { - u32 seqno; u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); intel_ring_emit(signaller, lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, seqno); + intel_ring_emit(signaller, signaller_req->seqno); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); @@ -1450,11 +1446,9 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[id]; if (i915_mmio_reg_valid(mbox_reg)) { - u32 seqno = i915_gem_request_get_seqno(signaller_req); - intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(signaller, mbox_reg); - intel_ring_emit(signaller, seqno); + intel_ring_emit(signaller, signaller_req->seqno); } } @@ -1490,7 +1484,7 @@ gen6_add_request(struct drm_i915_gem_request *req) intel_ring_emit(engine, MI_STORE_DWORD_INDEX); intel_ring_emit(engine, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(engine, req->seqno); intel_ring_emit(engine, MI_USER_INTERRUPT); __intel_ring_advance(engine); @@ -1628,7 +1622,9 @@ static int pc_render_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 addr = engine->status_page.gfx_addr + + (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + u32 scratch_addr = addr; int ret; /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently @@ -1644,12 +1640,12 @@ pc_render_add_request(struct drm_i915_gem_request *req) return ret; intel_ring_emit(engine, - GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | + GFX_OP_PIPE_CONTROL(4) | + PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - intel_ring_emit(engine, - engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(engine, addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(engine, req->seqno); intel_ring_emit(engine, 0); PIPE_CONTROL_FLUSH(engine, scratch_addr); scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ @@ -1668,9 +1664,8 @@ pc_render_add_request(struct drm_i915_gem_request *req) PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_NOTIFY); - intel_ring_emit(engine, - engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(engine, addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(engine, req->seqno); intel_ring_emit(engine, 0); __intel_ring_advance(engine); @@ -1702,30 +1697,6 @@ gen6_seqno_barrier(struct intel_engine_cs *engine) spin_unlock_irq(&dev_priv->uncore.lock); } -static u32 -ring_get_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); -} - -static void -ring_set_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); -} - -static u32 -pc_render_get_seqno(struct intel_engine_cs *engine) -{ - return engine->scratch.cpu_page[0]; -} - -static void -pc_render_set_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - engine->scratch.cpu_page[0] = seqno; -} - static bool gen5_ring_get_irq(struct intel_engine_cs *engine) { @@ -1856,7 +1827,7 @@ i9xx_add_request(struct drm_i915_gem_request *req) intel_ring_emit(engine, MI_STORE_DWORD_INDEX); intel_ring_emit(engine, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(engine, req->seqno); intel_ring_emit(engine, MI_USER_INTERRUPT); __intel_ring_advance(engine); @@ -2675,7 +2646,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); - engine->set_seqno(engine, seqno); + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); engine->last_submitted_seqno = seqno; engine->hangcheck.seqno = seqno; @@ -3021,8 +2994,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, { engine->init_hw = init_ring_common; engine->write_tail = ring_write_tail; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; engine->add_request = i9xx_add_request; if (INTEL_GEN(dev_priv) >= 6) @@ -3074,8 +3045,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) } else if (IS_GEN5(dev_priv)) { engine->add_request = pc_render_add_request; engine->flush = gen4_render_ring_flush; - engine->get_seqno = pc_render_get_seqno; - engine->set_seqno = pc_render_set_seqno; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { -- cgit v1.2.2 From f8291952bd8c10e2dfb6fc0419ed747922ab90ea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:18 +0100 Subject: drm/i915: Stop mapping the scratch page into CPU space After the elimination of using the scratch page for Ironlake's breadcrumb, we no longer need to kmap the object. We therefore can move it into the high unmappable space and do not need to force the object to be coherent (i.e. snooped on !llc platforms). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-9-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +++++++++------------------------ 1 file changed, 11 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 02104fbf9045..7ccfb1e57d12 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -648,58 +648,40 @@ out: return ret; } -void -intel_fini_pipe_control(struct intel_engine_cs *engine) +void intel_fini_pipe_control(struct intel_engine_cs *engine) { if (engine->scratch.obj == NULL) return; - if (INTEL_GEN(engine->i915) >= 5) { - kunmap(sg_page(engine->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(engine->scratch.obj); - } - + i915_gem_object_ggtt_unpin(engine->scratch.obj); drm_gem_object_unreference(&engine->scratch.obj->base); engine->scratch.obj = NULL; } -int -intel_init_pipe_control(struct intel_engine_cs *engine) +int intel_init_pipe_control(struct intel_engine_cs *engine) { + struct drm_i915_gem_object *obj; int ret; WARN_ON(engine->scratch.obj); - engine->scratch.obj = i915_gem_object_create(engine->i915->dev, 4096); - if (IS_ERR(engine->scratch.obj)) { - DRM_ERROR("Failed to allocate seqno page\n"); - ret = PTR_ERR(engine->scratch.obj); - engine->scratch.obj = NULL; + obj = i915_gem_object_create(engine->i915->dev, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + ret = PTR_ERR(obj); goto err; } - ret = i915_gem_object_set_cache_level(engine->scratch.obj, - I915_CACHE_LLC); + ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); if (ret) goto err_unref; - ret = i915_gem_obj_ggtt_pin(engine->scratch.obj, 4096, 0); - if (ret) - goto err_unref; - - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(engine->scratch.obj); - engine->scratch.cpu_page = kmap(sg_page(engine->scratch.obj->pages->sgl)); - if (engine->scratch.cpu_page == NULL) { - ret = -ENOMEM; - goto err_unpin; - } - + engine->scratch.obj = obj; + engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", engine->name, engine->scratch.gtt_offset); return 0; -err_unpin: - i915_gem_object_ggtt_unpin(engine->scratch.obj); err_unref: drm_gem_object_unreference(&engine->scratch.obj->base); err: -- cgit v1.2.2 From de8fe1663a5e1e6d83b21caf000f462483a8a559 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:19 +0100 Subject: drm/i915: Allocate scratch page from stolen With the last direct CPU access to the scratch page removed, we can now allocate it from our small amount of reserved system pages (stolen memory). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-10-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7ccfb1e57d12..3bc968b7aedc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,7 +665,9 @@ int intel_init_pipe_control(struct intel_engine_cs *engine) WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create_stolen(engine->i915->dev, 4096); + if (!obj) + obj = i915_gem_object_create(engine->i915->dev, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); -- cgit v1.2.2 From 7d5ea80720a8e53ff4ea309708cef2a7a0e163c7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:20 +0100 Subject: drm/i915: Refactor scratch object allocation for gen2 w/a buffer The gen2 w/a buffer is stuffed into the same slot as the gen5+ scratch buffer. If we pass in the size we want to allocate for the scratch buffer, both callers can use the same routine. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-11-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3bc968b7aedc..25d616d114e4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -658,16 +658,16 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine) engine->scratch.obj = NULL; } -int intel_init_pipe_control(struct intel_engine_cs *engine) +int intel_init_pipe_control(struct intel_engine_cs *engine, int size) { struct drm_i915_gem_object *obj; int ret; WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create_stolen(engine->i915->dev, 4096); + obj = i915_gem_object_create_stolen(engine->i915->dev, size); if (!obj) - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create(engine->i915->dev, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); @@ -3002,7 +3002,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; - struct drm_i915_gem_object *obj; int ret; engine->name = "render ring"; @@ -3045,31 +3044,16 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - /* Workaround batchbuffer to combat CS tlb bug. */ - if (HAS_BROKEN_CS_TLB(dev_priv)) { - obj = i915_gem_object_create(dev, I830_WA_SIZE); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate batch bo\n"); - return PTR_ERR(obj); - } - - ret = i915_gem_obj_ggtt_pin(obj, 0, 0); - if (ret != 0) { - drm_gem_object_unreference(&obj->base); - DRM_ERROR("Failed to ping batch bo\n"); - return ret; - } - - engine->scratch.obj = obj; - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); - } - ret = intel_init_ring_buffer(dev, engine); if (ret) return ret; if (INTEL_GEN(dev_priv) >= 5) { - ret = intel_init_pipe_control(engine); + ret = intel_init_pipe_control(engine, 4096); + if (ret) + return ret; + } else if (HAS_BROKEN_CS_TLB(dev_priv)) { + ret = intel_init_pipe_control(engine, I830_WA_SIZE); if (ret) return ret; } -- cgit v1.2.2 From f8973c217f07903247d222ab92ad37e2529aff2e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:21 +0100 Subject: drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) On Ironlake, there is no command nor register to ensure that the write from a MI_STORE command is completed (and coherent on the CPU) before the command parser continues. This means that the ordering between the seqno write and the subsequent user interrupt is undefined (like gen6+). So to ensure that the seqno write is completed after the final user interrupt we need to delay the read sufficiently to allow the write to complete. This delay is undefined by the bspec, and empirically requires 75us even though a register read combined with a clflush is less than 500ns. Hence, the delay is due to an on-chip buffer rather than the latency of the write to memory. Note that the render ring controls this by filling the PIPE_CONTROL fifo with stalling commands that force the earliest pipe-control with the seqno to be completed before the command parser continues. Given that we need a barrier operation for BSD, we may as well forgo the extra per-batch latency by using a common per-interrupt barrier. Studying the impact of adding the usleep shows that in both sequences of and individual synchronous no-op batches is negligible for the media engine (where the write now is unordered with the interrupt). Converting the render engine over from the current glutton of pie-controls over to the per-interrupt delays speeds up both the sequential and individual synchronous no-ops by 20% and 60%, respectively. This speed up holds even when looking at the throughput of small copies (4KiB->4MiB), both serial and synchronous, by about 20%. This is because despite adding a significant delay to the interrupt, in all likelihood we will see the seqno write without having to apply the barrier (only in the rare corner cases where the write is delayed on the last required is the delay necessary). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94307 Testcase: igt/gem_sync #ilk Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-12-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 81 +++++++-------------------------- 1 file changed, 17 insertions(+), 64 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 25d616d114e4..3bd598673d88 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1593,67 +1593,22 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, return 0; } -#define PIPE_CONTROL_FLUSH(ring__, addr__) \ -do { \ - intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ - PIPE_CONTROL_DEPTH_STALL); \ - intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ - intel_ring_emit(ring__, 0); \ - intel_ring_emit(ring__, 0); \ -} while (0) - -static int -pc_render_add_request(struct drm_i915_gem_request *req) +static void +gen5_seqno_barrier(struct intel_engine_cs *ring) { - struct intel_engine_cs *engine = req->engine; - u32 addr = engine->status_page.gfx_addr + - (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - u32 scratch_addr = addr; - int ret; - - /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently - * incoherent with writes to memory, i.e. completely fubar, - * so we need to use PIPE_NOTIFY instead. + /* MI_STORE are internally buffered by the GPU and not flushed + * either by MI_FLUSH or SyncFlush or any other combination of + * MI commands. * - * However, we also need to workaround the qword write - * incoherence by flushing the 6 PIPE_NOTIFY buffers out to - * memory before requesting an interrupt. + * "Only the submission of the store operation is guaranteed. + * The write result will be complete (coherent) some time later + * (this is practically a finite period but there is no guaranteed + * latency)." + * + * Empirically, we observe that we need a delay of at least 75us to + * be sure that the seqno write is visible by the CPU. */ - ret = intel_ring_begin(req, 32); - if (ret) - return ret; - - intel_ring_emit(engine, - GFX_OP_PIPE_CONTROL(4) | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - intel_ring_emit(engine, addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, req->seqno); - intel_ring_emit(engine, 0); - PIPE_CONTROL_FLUSH(engine, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ - PIPE_CONTROL_FLUSH(engine, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(engine, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(engine, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(engine, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(engine, scratch_addr); - - intel_ring_emit(engine, - GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_NOTIFY); - intel_ring_emit(engine, addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, req->seqno); - intel_ring_emit(engine, 0); - __intel_ring_advance(engine); - - return 0; + usleep_range(125, 250); } static void @@ -2964,6 +2919,7 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, } else if (INTEL_GEN(dev_priv) >= 5) { engine->irq_get = gen5_ring_get_irq; engine->irq_put = gen5_ring_put_irq; + engine->irq_seqno_barrier = gen5_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 3) { engine->irq_get = i9xx_ring_get_irq; engine->irq_put = i9xx_ring_put_irq; @@ -3012,11 +2968,12 @@ int intel_init_render_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; engine->add_request = gen8_render_add_request; engine->flush = gen8_render_ring_flush; - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (i915_semaphore_is_enabled(dev_priv)) engine->semaphore.signal = gen8_rcs_signal; } else if (INTEL_GEN(dev_priv) >= 6) { @@ -3024,12 +2981,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) engine->flush = gen6_render_ring_flush; - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; } else if (IS_GEN5(dev_priv)) { - engine->add_request = pc_render_add_request; engine->flush = gen4_render_ring_flush; - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT | - GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { if (INTEL_GEN(dev_priv) < 4) engine->flush = gen2_render_ring_flush; @@ -3048,7 +3001,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (ret) return ret; - if (INTEL_GEN(dev_priv) >= 5) { + if (INTEL_GEN(dev_priv) >= 6) { ret = intel_init_pipe_control(engine, 4096); if (ret) return ret; -- cgit v1.2.2 From 31bb59cc01fda94e55eca7304e0e745908c3bec6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:27 +0100 Subject: drm/i915: Move the get/put irq locking into the caller With only a single callsite for intel_engine_cs->irq_get and ->irq_put, we can reduce the code size by moving the common preamble into the caller, and we can also eliminate the reference counting. For completeness, as we are no longer doing reference counting on irq, rename the get/put vfunctions to enable/disable respectively and are able to review the use of posting reads. We only require the serialisation with hardware when enabling the interrupt (i.e. so we cannot miss an interrupt by going to sleep before the hardware truly enables it). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-18-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 237 ++++++++++---------------------- 1 file changed, 70 insertions(+), 167 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3bd598673d88..994f7616a58b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1636,103 +1636,54 @@ gen6_seqno_barrier(struct intel_engine_cs *engine) spin_unlock_irq(&dev_priv->uncore.lock); } -static bool -gen5_ring_get_irq(struct intel_engine_cs *engine) +static void +gen5_irq_enable(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) - gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); } static void -gen5_ring_put_irq(struct intel_engine_cs *engine) +gen5_irq_disable(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) - gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); } -static bool -i9xx_ring_get_irq(struct intel_engine_cs *engine) +static void +i9xx_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - if (!intel_irqs_enabled(dev_priv)) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - dev_priv->irq_mask &= ~engine->irq_enable_mask; - I915_WRITE(IMR, dev_priv->irq_mask); - POSTING_READ(IMR); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - return true; + dev_priv->irq_mask &= ~engine->irq_enable_mask; + I915_WRITE(IMR, dev_priv->irq_mask); + POSTING_READ_FW(RING_IMR(engine->mmio_base)); } static void -i9xx_ring_put_irq(struct intel_engine_cs *engine) +i9xx_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - dev_priv->irq_mask |= engine->irq_enable_mask; - I915_WRITE(IMR, dev_priv->irq_mask); - POSTING_READ(IMR); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + dev_priv->irq_mask |= engine->irq_enable_mask; + I915_WRITE(IMR, dev_priv->irq_mask); } -static bool -i8xx_ring_get_irq(struct intel_engine_cs *engine) +static void +i8xx_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - if (!intel_irqs_enabled(dev_priv)) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - dev_priv->irq_mask &= ~engine->irq_enable_mask; - I915_WRITE16(IMR, dev_priv->irq_mask); - POSTING_READ16(IMR); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + dev_priv->irq_mask &= ~engine->irq_enable_mask; + I915_WRITE16(IMR, dev_priv->irq_mask); + POSTING_READ16(RING_IMR(engine->mmio_base)); } static void -i8xx_ring_put_irq(struct intel_engine_cs *engine) +i8xx_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - dev_priv->irq_mask |= engine->irq_enable_mask; - I915_WRITE16(IMR, dev_priv->irq_mask); - POSTING_READ16(IMR); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + dev_priv->irq_mask |= engine->irq_enable_mask; + I915_WRITE16(IMR, dev_priv->irq_mask); } static int @@ -1773,122 +1724,74 @@ i9xx_add_request(struct drm_i915_gem_request *req) return 0; } -static bool -gen6_ring_get_irq(struct intel_engine_cs *engine) +static void +gen6_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | - GT_PARITY_ERROR(dev_priv))); - else - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); - gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + if (HAS_L3_DPF(dev_priv) && engine->id == RCS) + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | + GT_PARITY_ERROR(dev_priv))); + else + I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); } static void -gen6_ring_put_irq(struct intel_engine_cs *engine) +gen6_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv)); - else - I915_WRITE_IMR(engine, ~0); - gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + if (HAS_L3_DPF(dev_priv) && engine->id == RCS) + I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv)); + else + I915_WRITE_IMR(engine, ~0); + gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); } -static bool -hsw_vebox_get_irq(struct intel_engine_cs *engine) +static void +hsw_vebox_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); - gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask); } static void -hsw_vebox_put_irq(struct intel_engine_cs *engine) +hsw_vebox_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - I915_WRITE_IMR(engine, ~0); - gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + I915_WRITE_IMR(engine, ~0); + gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask); } -static bool -gen8_ring_get_irq(struct intel_engine_cs *engine) +static void +gen8_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) { - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | - GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); - } else { - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); - } - POSTING_READ(RING_IMR(engine->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + if (HAS_L3_DPF(dev_priv) && engine->id == RCS) + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | + GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); + else + I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + POSTING_READ_FW(RING_IMR(engine->mmio_base)); } static void -gen8_ring_put_irq(struct intel_engine_cs *engine) +gen8_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) { - I915_WRITE_IMR(engine, - ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); - } else { - I915_WRITE_IMR(engine, ~0); - } - POSTING_READ(RING_IMR(engine->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + if (HAS_L3_DPF(dev_priv) && engine->id == RCS) + I915_WRITE_IMR(engine, + ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + else + I915_WRITE_IMR(engine, ~0); } static int @@ -2909,23 +2812,23 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; + engine->irq_enable = gen8_irq_enable; + engine->irq_disable = gen8_irq_disable; engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 6) { - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; + engine->irq_enable = gen6_irq_enable; + engine->irq_disable = gen6_irq_disable; engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 5) { - engine->irq_get = gen5_ring_get_irq; - engine->irq_put = gen5_ring_put_irq; + engine->irq_enable = gen5_irq_enable; + engine->irq_disable = gen5_irq_disable; engine->irq_seqno_barrier = gen5_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 3) { - engine->irq_get = i9xx_ring_get_irq; - engine->irq_put = i9xx_ring_put_irq; + engine->irq_enable = i9xx_irq_enable; + engine->irq_disable = i9xx_irq_disable; } else { - engine->irq_get = i8xx_ring_get_irq; - engine->irq_put = i8xx_ring_put_irq; + engine->irq_enable = i8xx_irq_enable; + engine->irq_disable = i8xx_irq_disable; } } @@ -3115,8 +3018,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; } else { engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - engine->irq_get = hsw_vebox_get_irq; - engine->irq_put = hsw_vebox_put_irq; + engine->irq_enable = hsw_vebox_irq_enable; + engine->irq_disable = hsw_vebox_irq_disable; } return intel_init_ring_buffer(dev, engine); -- cgit v1.2.2 From 61ff75ac20ffa6d0039f643234d0c5244070fb15 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Jul 2016 17:23:28 +0100 Subject: drm/i915: Simplify enabling user-interrupts with L3-remapping Borrow the idea from intel_lrc.c to precompute the mask of interrupts we wish to always enable to avoid having lots of conditionals inside the interrupt enabling. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-19-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 34 +++++++++++---------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 994f7616a58b..a924a9f9eb4d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1313,8 +1313,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (HAS_L3_DPF(dev_priv)) - I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv)); + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); return init_workarounds_ring(engine); } @@ -1729,12 +1728,9 @@ gen6_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | - GT_PARITY_ERROR(dev_priv))); - else - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | + engine->irq_keep_mask)); gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); } @@ -1743,10 +1739,7 @@ gen6_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv)); - else - I915_WRITE_IMR(engine, ~0); + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); } @@ -1773,12 +1766,9 @@ gen8_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | - GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); - else - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | + engine->irq_keep_mask)); POSTING_READ_FW(RING_IMR(engine->mmio_base)); } @@ -1787,11 +1777,7 @@ gen8_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); - else - I915_WRITE_IMR(engine, ~0); + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } static int @@ -2872,6 +2858,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + if (HAS_L3_DPF(dev_priv)) + engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; -- cgit v1.2.2 From 7b4d3a16dd97be0ebc793ea046b9af9d5c9b1b1a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Jul 2016 08:08:37 +0100 Subject: drm/i915: Remove stop-rings debugfs interface Now that we have (near) universal GPU recovery code, we can inject a real hang from userspace and not need any fakery. Not only does this mean that the testing is far more realistic, but we can simplify the kernel in the process. Signed-off-by: Chris Wilson Reviewed-by: Arun Siluvery Link: http://patchwork.freedesktop.org/patch/msgid/1467616119-4093-7-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a924a9f9eb4d..3fafc35d99c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -58,18 +58,10 @@ void intel_ring_update_space(struct intel_ringbuffer *ringbuf) ringbuf->tail, ringbuf->size); } -bool intel_engine_stopped(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - return dev_priv->gpu_error.stop_rings & intel_engine_flag(engine); -} - static void __intel_ring_advance(struct intel_engine_cs *engine) { struct intel_ringbuffer *ringbuf = engine->buffer; ringbuf->tail &= ringbuf->size - 1; - if (intel_engine_stopped(engine)) - return; engine->write_tail(engine, ringbuf->tail); } -- cgit v1.2.2 From fac5e23e3c385fde41aab4a23bc50c8c15ad4d00 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Jul 2016 11:34:36 +0100 Subject: drm/i915: Mass convert dev->dev_private to to_i915(dev) Since we now subclass struct drm_device, we can save pointer dances by noting the equivalence of struct drm_device and struct drm_i915_private, i.e. by using to_i915(). text data bss dec hex filename 1073824 4562 416 1078802 107612 drivers/gpu/drm/i915/i915.ko 1068976 4562 416 1073954 106322 drivers/gpu/drm/i915/i915.ko Created by the coccinelle script: @@ expression E; identifier p; @@ - struct drm_i915_private *p = E->dev_private; + struct drm_i915_private *p = to_i915(E); Signed-off-by: Chris Wilson Reviewed-by: Dave Gordon Link: http://patchwork.freedesktop.org/patch/msgid/1467628477-25379-1-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3fafc35d99c1..428ed62dc5de 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2837,7 +2837,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, int intel_init_render_ring_buffer(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine = &dev_priv->engine[RCS]; int ret; @@ -2899,7 +2899,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) int intel_init_bsd_ring_buffer(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine = &dev_priv->engine[VCS]; engine->name = "bsd ring"; @@ -2937,7 +2937,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) */ int intel_init_bsd2_ring_buffer(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine = &dev_priv->engine[VCS2]; engine->name = "bsd2 ring"; @@ -2957,7 +2957,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) int intel_init_blt_ring_buffer(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine = &dev_priv->engine[BCS]; engine->name = "blitter ring"; @@ -2980,7 +2980,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) int intel_init_vebox_ring_buffer(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine = &dev_priv->engine[VECS]; engine->name = "video enhancement ring"; -- cgit v1.2.2 From 91c8a326a192117219d5b9b980244c3662e35404 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Jul 2016 10:40:23 +0100 Subject: drm/i915: Convert dev_priv->dev backpointers to dev_priv->drm Since drm_i915_private is now a subclass of drm_device we do not need to chase the drm_i915_private->dev backpointer and can instead simply access drm_i915_private->drm directly. text data bss dec hex filename 1068757 4565 416 1073738 10624a drivers/gpu/drm/i915/i915.ko 1066949 4565 416 1071930 105b3a drivers/gpu/drm/i915/i915.ko Created by the coccinelle script: @@ struct drm_i915_private *d; identifier i; @@ ( - d->dev->i + d->drm.i | - d->dev + &d->drm ) and for good measure the dev_priv->dev backpointer was removed entirely. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/1467711623-2905-4-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 428ed62dc5de..61e00bf9e87f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -657,9 +657,9 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size) WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create_stolen(engine->i915->dev, size); + obj = i915_gem_object_create_stolen(&engine->i915->drm, size); if (!obj) - obj = i915_gem_object_create(engine->i915->dev, size); + obj = i915_gem_object_create(&engine->i915->drm, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); @@ -1888,7 +1888,7 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) if (!dev_priv->status_page_dmah) return; - drm_pci_free(dev_priv->dev, dev_priv->status_page_dmah); + drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); engine->status_page.page_addr = NULL; } @@ -1914,7 +1914,7 @@ static int init_status_page(struct intel_engine_cs *engine) unsigned flags; int ret; - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create(&engine->i915->drm, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); return PTR_ERR(obj); @@ -1963,7 +1963,7 @@ static int init_phys_status_page(struct intel_engine_cs *engine) if (!dev_priv->status_page_dmah) { dev_priv->status_page_dmah = - drm_pci_alloc(dev_priv->dev, PAGE_SIZE, PAGE_SIZE); + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); if (!dev_priv->status_page_dmah) return -ENOMEM; } @@ -2096,7 +2096,7 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) ring->last_retired_head = -1; intel_ring_update_space(ring); - ret = intel_alloc_ringbuffer_obj(engine->i915->dev, ring); + ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); if (ret) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", engine->name, ret); @@ -2122,7 +2122,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; int ret; - lockdep_assert_held(&ctx->i915->dev->struct_mutex); + lockdep_assert_held(&ctx->i915->drm.struct_mutex); if (ce->pin_count++) return 0; @@ -2156,7 +2156,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, { struct intel_context *ce = &ctx->engine[engine->id]; - lockdep_assert_held(&ctx->i915->dev->struct_mutex); + lockdep_assert_held(&ctx->i915->drm.struct_mutex); if (--ce->pin_count) return; @@ -2696,7 +2696,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, return; if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { - obj = i915_gem_object_create(dev_priv->dev, 4096); + obj = i915_gem_object_create(&dev_priv->drm, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); i915.semaphores = 0; -- cgit v1.2.2