aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-11-23 10:26:30 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2017-11-23 11:12:04 -0500
commitb1c24a6137af11fca49192a42face03cacbd7fc5 (patch)
tree557beb020f4447a4b66262864d5596cc46b50ef2
parentc83a8d4a2ec93c53f78e109dee5e21953485cbfd (diff)
drm/i915: Unwind incomplete legacy context switches
The legacy context switch for ringbuffer submission is multistaged, where each of those stages may fail. However, we were updating global state after some stages, and so we had to force the incomplete request to be submitted because we could not unwind. Save the global state before performing the switches, and so enable us to unwind back to the previous global state should any phase fail. We then must cancel the request instead of submitting it should the construction fail. v2: s/saved_ctx/from_ctx/; s/ctx/to_ctx/ etc. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171123152631.31385-1-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c173
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c18
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c1
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h1
4 files changed, 65 insertions, 128 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 6ca56e482d79..da4a64b3484c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -507,6 +507,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
507 507
508 for_each_engine(engine, dev_priv, id) { 508 for_each_engine(engine, dev_priv, id) {
509 engine->legacy_active_context = NULL; 509 engine->legacy_active_context = NULL;
510 engine->legacy_active_ppgtt = NULL;
510 511
511 if (!engine->last_retired_context) 512 if (!engine->last_retired_context)
512 continue; 513 continue;
@@ -681,68 +682,49 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice)
681 return 0; 682 return 0;
682} 683}
683 684
684static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, 685/**
685 struct intel_engine_cs *engine, 686 * i915_switch_context() - perform a GPU context switch.
686 struct i915_gem_context *to) 687 * @rq: request for which we'll execute the context switch
687{ 688 *
688 if (to->remap_slice) 689 * The context life cycle is simple. The context refcount is incremented and
689 return false; 690 * decremented by 1 on create and destroy. If the context is in use by the GPU,
690 691 * it will have a refcount > 1. This allows us to destroy the context abstract
691 if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 692 * object while letting the normal object tracking destroy the backing BO.
692 return false; 693 *
693 694 * This function should not be used in execlists mode. Instead the context is
694 return to == engine->legacy_active_context; 695 * switched by writing to the ELSP and requests keep a reference to their
695} 696 * context.
696 697 */
697static bool 698int i915_switch_context(struct drm_i915_gem_request *rq)
698needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
699{
700 struct i915_gem_context *from = engine->legacy_active_context;
701
702 if (!ppgtt)
703 return false;
704
705 /* Always load the ppgtt on first use */
706 if (!from)
707 return true;
708
709 /* Same context without new entries, skip */
710 if ((!from->ppgtt || from->ppgtt == ppgtt) &&
711 !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
712 return false;
713
714 if (engine->id != RCS)
715 return true;
716
717 return true;
718}
719
720static int do_rcs_switch(struct drm_i915_gem_request *req)
721{ 699{
722 struct i915_gem_context *to = req->ctx; 700 struct intel_engine_cs *engine = rq->engine;
723 struct intel_engine_cs *engine = req->engine; 701 struct i915_gem_context *to_ctx = rq->ctx;
724 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 702 struct i915_hw_ppgtt *to_mm =
725 struct i915_gem_context *from = engine->legacy_active_context; 703 to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
726 u32 hw_flags; 704 struct i915_gem_context *from_ctx = engine->legacy_active_context;
705 struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt;
706 u32 hw_flags = 0;
727 int ret, i; 707 int ret, i;
728 708
729 GEM_BUG_ON(engine->id != RCS); 709 lockdep_assert_held(&rq->i915->drm.struct_mutex);
710 GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
730 711
731 if (skip_rcs_switch(ppgtt, engine, to)) 712 if (to_mm != from_mm ||
732 return 0; 713 (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) {
733 714 trace_switch_mm(engine, to_ctx);
734 if (needs_pd_load_pre(ppgtt, engine)) { 715 ret = to_mm->switch_mm(to_mm, rq);
735 /* Older GENs and non render rings still want the load first,
736 * "PP_DCLV followed by PP_DIR_BASE register through Load
737 * Register Immediate commands in Ring Buffer before submitting
738 * a context."*/
739 trace_switch_mm(engine, to);
740 ret = ppgtt->switch_mm(ppgtt, req);
741 if (ret) 716 if (ret)
742 return ret; 717 goto err;
718
719 to_mm->pd_dirty_rings &= ~intel_engine_flag(engine);
720 engine->legacy_active_ppgtt = to_mm;
721 hw_flags = MI_FORCE_RESTORE;
743 } 722 }
744 723
745 if (i915_gem_context_is_kernel(to)) 724 if (to_ctx->engine[engine->id].state &&
725 (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
726 GEM_BUG_ON(engine->id != RCS);
727
746 /* 728 /*
747 * The kernel context(s) is treated as pure scratch and is not 729 * The kernel context(s) is treated as pure scratch and is not
748 * expected to retain any state (as we sacrifice it during 730 * expected to retain any state (as we sacrifice it during
@@ -750,78 +732,37 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
750 * as nothing actually executes using the kernel context; it 732 * as nothing actually executes using the kernel context; it
751 * is purely used for flushing user contexts. 733 * is purely used for flushing user contexts.
752 */ 734 */
753 hw_flags = MI_RESTORE_INHIBIT; 735 if (i915_gem_context_is_kernel(to_ctx))
754 else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) 736 hw_flags = MI_RESTORE_INHIBIT;
755 hw_flags = MI_FORCE_RESTORE;
756 else
757 hw_flags = 0;
758 737
759 if (to != from || (hw_flags & MI_FORCE_RESTORE)) { 738 ret = mi_set_context(rq, hw_flags);
760 ret = mi_set_context(req, hw_flags);
761 if (ret) 739 if (ret)
762 return ret; 740 goto err_mm;
763 741
764 engine->legacy_active_context = to; 742 engine->legacy_active_context = to_ctx;
765 } 743 }
766 744
767 if (ppgtt) 745 if (to_ctx->remap_slice) {
768 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 746 for (i = 0; i < MAX_L3_SLICES; i++) {
747 if (!(to_ctx->remap_slice & BIT(i)))
748 continue;
769 749
770 for (i = 0; i < MAX_L3_SLICES; i++) { 750 ret = remap_l3(rq, i);
771 if (!(to->remap_slice & (1<<i)))
772 continue;
773
774 ret = remap_l3(req, i);
775 if (ret)
776 return ret;
777
778 to->remap_slice &= ~(1<<i);
779 }
780
781 return 0;
782}
783
784/**
785 * i915_switch_context() - perform a GPU context switch.
786 * @req: request for which we'll execute the context switch
787 *
788 * The context life cycle is simple. The context refcount is incremented and
789 * decremented by 1 and create and destroy. If the context is in use by the GPU,
790 * it will have a refcount > 1. This allows us to destroy the context abstract
791 * object while letting the normal object tracking destroy the backing BO.
792 *
793 * This function should not be used in execlists mode. Instead the context is
794 * switched by writing to the ELSP and requests keep a reference to their
795 * context.
796 */
797int i915_switch_context(struct drm_i915_gem_request *req)
798{
799 struct intel_engine_cs *engine = req->engine;
800
801 lockdep_assert_held(&req->i915->drm.struct_mutex);
802 GEM_BUG_ON(HAS_EXECLISTS(req->i915));
803
804 if (!req->ctx->engine[engine->id].state) {
805 struct i915_gem_context *to = req->ctx;
806 struct i915_hw_ppgtt *ppgtt =
807 to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
808
809 if (needs_pd_load_pre(ppgtt, engine)) {
810 int ret;
811
812 trace_switch_mm(engine, to);
813 ret = ppgtt->switch_mm(ppgtt, req);
814 if (ret) 751 if (ret)
815 return ret; 752 goto err_ctx;
816
817 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
818 } 753 }
819 754
820 engine->legacy_active_context = to; 755 to_ctx->remap_slice = 0;
821 return 0;
822 } 756 }
823 757
824 return do_rcs_switch(req); 758 return 0;
759
760err_ctx:
761 engine->legacy_active_context = from_ctx;
762err_mm:
763 engine->legacy_active_ppgtt = from_mm;
764err:
765 return ret;
825} 766}
826 767
827static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) 768static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 7325469ce754..a90bdd26571f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -719,25 +719,19 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
719 /* Unconditionally invalidate GPU caches and TLBs. */ 719 /* Unconditionally invalidate GPU caches and TLBs. */
720 ret = engine->emit_flush(req, EMIT_INVALIDATE); 720 ret = engine->emit_flush(req, EMIT_INVALIDATE);
721 if (ret) 721 if (ret)
722 goto err_ctx; 722 goto err_unwind;
723 723
724 ret = engine->request_alloc(req); 724 ret = engine->request_alloc(req);
725 if (ret) { 725 if (ret)
726 /* 726 goto err_unwind;
727 * Past the point-of-no-return. Since we may have updated
728 * global state after partially completing the request alloc,
729 * we need to commit any commands so far emitted in the
730 * request to the HW.
731 */
732 __i915_add_request(req, false);
733 return ERR_PTR(ret);
734 }
735 727
736 /* Check that we didn't interrupt ourselves with a new request */ 728 /* Check that we didn't interrupt ourselves with a new request */
737 GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); 729 GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
738 return req; 730 return req;
739 731
740err_ctx: 732err_unwind:
733 req->ring->emit = req->head;
734
741 /* Make sure we didn't add ourselves to external state before freeing */ 735 /* Make sure we didn't add ourselves to external state before freeing */
742 GEM_BUG_ON(!list_empty(&req->active_list)); 736 GEM_BUG_ON(!list_empty(&req->active_list));
743 GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); 737 GEM_BUG_ON(!list_empty(&req->priotree.signalers_list));
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index bfa11a84e476..a904b0353bec 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -591,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine,
591 request->ring->head = request->postfix; 591 request->ring->head = request->postfix;
592 } else { 592 } else {
593 engine->legacy_active_context = NULL; 593 engine->legacy_active_context = NULL;
594 engine->legacy_active_ppgtt = NULL;
594 } 595 }
595} 596}
596 597
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d38d059285dc..ef304c99954c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -531,6 +531,7 @@ struct intel_engine_cs {
531 * stream (ring). 531 * stream (ring).
532 */ 532 */
533 struct i915_gem_context *legacy_active_context; 533 struct i915_gem_context *legacy_active_context;
534 struct i915_hw_ppgtt *legacy_active_ppgtt;
534 535
535 /* status_notifier: list of callbacks for context-switch changes */ 536 /* status_notifier: list of callbacks for context-switch changes */
536 struct atomic_notifier_head context_status_notifier; 537 struct atomic_notifier_head context_status_notifier;